Python Critic.to примеры использования

Язык программирования: Python

Пространство имен/Пакет: models

Класс/Тип: Critic

Метод/Функция: to

Примеров на hotexamples.com: 5

Python Critic.to - 5 примеров найдено. Это лучшие примеры Python кода для models.Critic.to, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Critic(30)

parameters(30)

load_state_dict(28)

state_dict(26)

forward(10)

eval(9)

train(7)

cuda(6)

to(5)

apply(4)

save_model(4)

load_model(4)

Q1(2)

set_weights(2)

zero_grad(2)

named_parameters(2)

get_new_hidden_state(2)

init_weights(2)

load_weights(2)

transfer_weights(1)

train_on_batch(1)

backward(1)

compile(1)

target_predict(1)

get_weights(1)

share_memory(1)

save_weights(1)

gradients(1)

save(1)

q1(1)

predict(1)

optimization_step(1)

Пример #1

Показать файл

Файл: train.py Проект: rahamor/wgan

def run():
    options = parse_options()
    print(options)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    os.makedirs(options.data_dir, exist_ok=True)
    os.makedirs(options.output_dir, exist_ok=True)
    os.makedirs(options.model_dir, exist_ok=True)

    with open(os.path.join(options.output_dir, 'options.json'), 'w') as f:
        json.dump(vars(options), f, indent=4)

    if options.restore:
        generator = torch.load(os.path.join(options.model_dir, 'generator.pt'))
        critic = torch.load(os.path.join(options.model_dir, 'critic.pt'))
    else:
        generator = Generator(options.image_size, options.state_size)
        critic = Critic(options.image_size)

        generator.apply(init_weights)
        critic.apply(init_weights)
    generator = generator.to(device)
    critic = critic.to(device)

    transform = transforms.Compose([
        transforms.Resize((options.image_size, options.image_size)),
        transforms.CenterCrop(options.image_size),  #redundant?
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    if options.dataset == 'lsun':
        training_class = options.image_class + '_train'
        dataset = datasets.LSUN(options.data_dir,
                                classes=[training_class],
                                transform=transform)
    else:
        dataset = datasets.ImageFolder(root=options.data_dir,
                                       transform=transform)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=options.batch_size,
                                             num_workers=4,
                                             shuffle=True,
                                             drop_last=True,
                                             pin_memory=True)

    train(generator, critic, dataloader, device, options)

Пример #2

Показать файл

Файл: Agent.py Проект: takeru1205/Insomnia

class TD3:
    def __init__(self,
                 env,
                 state_dim,
                 action_dim,
                 max_action,
                 gamma=0.99,
                 tau=0.005,
                 policy_noise=0.2,
                 noise_clip=0.5,
                 policy_freq=2):
        self.actor = Actor(state_dim, action_dim)
        self.actor_target = Actor(state_dim, action_dim)
        self.actor_target.load_state_dict(self.actor.state_dict())
        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=1e-3)

        self.critic = Critic(state_dim, action_dim)
        self.critic_target = Critic(state_dim, action_dim)
        self.critic_target.load_state_dict(self.critic.state_dict())
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=1e-3)

        self.max_action = max_action
        self.gamma = gamma
        self.tau = tau
        self.policy_noise = policy_noise
        self.noise_clip = noise_clip
        self.policy_freq = policy_freq

        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'

        self.actor.to(self.device)
        self.actor_target.to(self.device)
        self.critic.to(self.device)
        self.critic_target.to(self.device)

        self.env = env
        self.total_it = 0

    def select_action(self, state, noise=0.1):
        action = self.actor(state.to(self.device)).data.cpu().numpy().flatten()
        if noise != 0:
            action = (action + np.random.normal(
                0, noise, size=self.env.action_space.shape[0]))

        return action.clip(self.env.action_space.low,
                           self.env.action_space.high)

    def train(self, replay_buffer, batch_size=128):
        self.total_it += 1

        states, states_, actions, rewards, terminal = replay_buffer.sample_buffer(
            batch_size)

        with torch.no_grad():
            noise = (torch.randn_like(actions.to(self.device)) *
                     self.policy_noise).clamp(-self.noise_clip,
                                              self.noise_clip)

            next_action = (self.actor_target(states_.to(self.device)) +
                           noise).clamp(-self.max_action, self.max_action)

            # compute the target Q value
            target_q1, target_q2 = self.critic_target(
                states_.to(self.device), next_action.to(self.device))
            target_q = torch.min(target_q1, target_q2)
            # target_q = rewards + terminal * self.gamma + target_q.cpu()
            # target_q = rewards + (terminal.reshape(256, 1) * self.gamma * target_q).detach()
            target_q = rewards + terminal * self.gamma * target_q[:, 0].cpu()

        # Get current Q value
        current_q1, current_q2 = self.critic(states.to(self.device),
                                             actions.to(self.device))

        # Compute critic loss
        critic_loss = F.mse_loss(current_q1[:, 0], target_q.to(
            self.device)) + F.mse_loss(current_q2[:, 0],
                                       target_q.to(self.device))

        # optimize the critic
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        self.critic_optimizer.step()

        # Delayed policy updates
        if self.total_it % self.policy_freq == 0:
            # Compote actor loss
            actor_loss = -self.critic.q1(states.to(
                self.device), self.actor(states.to(self.device))).mean()

            # Optimize the actor
            self.actor_optimizer.zero_grad()
            actor_loss.backward()
            self.actor_optimizer.step()

            # Update the frozen target models
            for param, target_param in zip(self.critic.parameters(),
                                           self.critic_target.parameters()):
                target_param.data.copy_(self.tau * param.data +
                                        (1 - self.tau) * target_param.data)

            for param, target_param in zip(self.actor.parameters(),
                                           self.actor_target.parameters()):
                target_param.data.copy_(self.tau * param.data +
                                        (1 - self.tau) * target_param.data)

    def save(self, filename):
        torch.save(self.critic.state_dict(), filename + "_critic")
        torch.save(self.critic_optimizer.state_dict(),
                   filename + "_critic_optimizer")
        torch.save(self.actor.state_dict(), filename + "_actor")
        torch.save(self.actor_optimizer.state_dict(),
                   filename + "_actor_optimizer")

    def load(self, filename):
        self.critic.load_state_dict(torch.load(filename + "_critic"))
        self.critic_optimizer.load_state_dict(
            torch.load(filename + "_critic_optimizer"))
        self.actor.load_state_dict(torch.load(filename + "_actor"))
        self.actor_optimizer.load_state_dict(
            torch.load(filename + "_actor_optimizer"))

Пример #3

Показать файл

Файл: ddpg.py Проект: saashanair/rl-series

class DDPGAgent:
    """
    Encapsulates the functioning of the DDPG agent
    """

    def __init__(self, state_dim, action_dim, max_action, device, memory_capacity=10000, discount=0.99, tau=0.005, sigma=0.2, theta=0.15, actor_lr=1e-4, critic_lr=1e-3, train_mode=True):
        self.train_mode = train_mode # whether the agent is in training or testing mode

        self.state_dim = state_dim # dimension of the state space
        self.action_dim = action_dim # dimension of the action space
        
        self.device = device # defines which cuda or cpu device is to be used to run the networks
        self.discount = discount # denoted a gamma in the equation for computation of the Q-value
        self.tau = tau # defines the factor used for Polyak averaging (i.e., soft updating of the target networks)
        self.max_action = max_action # the max value of the range in the action space (assumes a symmetric range in the action space)
        
        # create an instance of the replay buffer
        self.memory = ReplayMemory(memory_capacity)

        # create an instance of the noise generating process
        self.ou_noise = OrnsteinUhlenbeckNoise(mu=np.zeros(self.action_dim), sigma=sigma, theta=theta)

        # instances of the networks for the actor and the critic
        self.actor = Actor(state_dim, action_dim, max_action, actor_lr)
        self.critic = Critic(state_dim, action_dim, critic_lr)

        # instance of the target networks for the actor and the critic
        self.target_actor = Actor(state_dim, action_dim, max_action, actor_lr)
        self.target_critic = Critic(state_dim, action_dim, critic_lr)

        # initialise the targets to the same weight as their corresponding current networks
        self.target_actor.load_state_dict(self.actor.state_dict())
        self.target_critic.load_state_dict(self.critic.state_dict())

        # since we do not learn/train on the target networks
        self.target_actor.eval()
        self.target_critic.eval()

        # for test mode
        if not self.train_mode:
            self.actor.eval()
            self.critic.eval()
            self.ounoise = None

        self.actor.to(self.device)
        self.critic.to(self.device)

        self.target_actor.to(self.device)
        self.target_critic.to(self.device)

    def select_action(self, state):
        """
        Function to return the appropriate action for the given state.
        During training, it adds a zero-mean OU noise to the action to encourage exploration.
        During testing, no noise is added to the action decision.

        Parameters
        ---
        state: vector or tensor
            The current state of the environment as observed by the agent
        
        Returns
        ---
        A numpy array representing the noisy action to be performed by the agent in the current state
        """

        if not torch.is_tensor(state):
            state = torch.tensor([state], dtype=torch.float32).to(self.device)
        
        self.actor.eval()
        act = self.actor(state).cpu().data.numpy().flatten() # performs inference using the actor based on the current state as the input and returns the corresponding np array
        self.actor.train()

        noise = 0.0

        ## for adding Gaussian noise (to use, update the code pass the exploration noise as input)
        #if self.train_mode:
        #	noise = np.random.normal(0.0, exploration_noise, size=act.shape) # generate the zero-mean gaussian noise with standard deviation determined by exploration_noise

        # for adding OU noise
        if self.train_mode:
            noise = self.ou_noise.generate_noise()

        noisy_action = act + noise
        noisy_action = noisy_action.clip(min=-self.max_action, max=self.max_action) # to ensure that the noisy action being returned is within the limit of "legal" actions afforded to the agent; assumes action range is symmetric

        return noisy_action

    def learn(self, batchsize):
        """
        Function to perform the updates on the 4 neural networks that run the DDPG algorithm.

        Parameters
        ---
        batchsize: int
            Number of experiences to be randomly sampled from the memory for the agent to learn from

        Returns
        ---
        none
        """

        if len(self.memory) < batchsize:
            return
        states, actions, next_states, rewards, dones = self.memory.sample(batchsize, self.device) # a batch of experiences randomly sampled form the memory

        # ensure that the actions and rewards tensors have the appropriate shapes
        actions = actions.view(-1, self.action_dim) 
        rewards = rewards.view(-1, 1)

        with torch.no_grad():
            # generate target actions
            target_action = self.target_actor(next_states)

            # calculate TD-Target
            target_q = self.target_critic(next_states, target_action)
            target_q[dones] = 0.0 # being in a terminal state implies there are no more future states that the agent would encounter in the given episode and so set the associated Q-value to 0
            y = rewards + self.discount * target_q

        current_q = self.critic(states, actions)
        critic_loss = F.mse_loss(current_q, y).mean()
        
        self.critic.optimizer.zero_grad()
        critic_loss.backward()
        self.critic.optimizer.step()

        # actor loss is calculated by a gradient ascent along the crtic, thus need to apply the negative sign to convert to a gradient descent
        pred_current_actions = self.actor(states)
        pred_current_q = self.critic(states, pred_current_actions)
        actor_loss = - pred_current_q.mean()

        self.actor.optimizer.zero_grad()
        actor_loss.backward()
        self.actor.optimizer.step()

        # apply slow-update to the target networks
        self.soft_update_targets()


    def soft_update_net(self, source_net_params, target_net_params):
        """
        Function to perform Polyak averaging to update the parameters of the provided network

        Parameters
        ---
        source_net_params: list
            trainable parameters of the source, ie. current version of the network
        target_net_params: list
            trainable parameters of the corresponding target network

        Returns
        ---
        none
        """

        for source_param, target_param in zip(source_net_params, target_net_params):
            target_param.data.copy_(self.tau * source_param.data + (1 - self.tau) * target_param.data)

    def soft_update_targets(self):
        """
        Function that calls Polyak averaging on all three target networks

        Parameters
        ---
        none

        Returns
        ---
        none
        """

        self.soft_update_net(self.actor.parameters(), self.target_actor.parameters())
        self.soft_update_net(self.critic.parameters(), self.target_critic.parameters())

    def save(self, path, model_name):
        """
        Function to save the actor and critic networks

        Parameters
        ---
        path: str
            Location where the model is to be saved
        model_name: str
            Name of the model

        Returns
        ---
        none
        """

        self.actor.save_model('{}/{}_actor'.format(path, model_name))
        self.critic.save_model('{}/{}_critic'.format(path, model_name))

    def load(self, path, model_name):
        """
        Function to load the actor and critic networks

        Parameters
        ---
        path: str
            Location where the model is saved
        model_name: str
            Name of the model

        Returns
        ---
        none
        """

        self.actor.load_model('{}/{}_actor'.format(path, model_name))
        self.critic.load_model('{}/{}_critic'.format(path, model_name))

Пример #4

Показать файл

class TD3Agent:
    """
    Encapsulates the functioning of the TD3 agent
    """
    def __init__(self,
                 state_dim,
                 action_dim,
                 max_action,
                 device,
                 memory_capacity=10000,
                 discount=0.99,
                 update_freq=2,
                 tau=0.005,
                 policy_noise_std=0.2,
                 policy_noise_clip=0.5,
                 actor_lr=1e-3,
                 critic_lr=1e-3,
                 train_mode=True):
        self.train_mode = train_mode  # whether the agent is in training or testing mode

        self.state_dim = state_dim  # dimension of the state space
        self.action_dim = action_dim  # dimension of the action space

        self.device = device  # defines which cuda or cpu device is to be used to run the networks
        self.discount = discount  # denoted a gamma in the equation for computation of the Q-value
        self.update_freq = update_freq  # defines how frequently should the actor and target be updated
        self.tau = tau  # defines the factor used for Polyak averaging (i.e., soft updating of the target networks)
        self.max_action = max_action  # the max value of the range in the action space (assumes a symmetric range in the action space)
        self.policy_noise_clip = policy_noise_clip  # max range within which the noise for the target policy smoothing must be contained
        self.policy_noise_std = policy_noise_std  # standard deviation, i.e. sigma, of the Gaussian noise applied for target policy smoothing

        # create an instance of the replay buffer
        self.memory = ReplayMemory(memory_capacity)

        # instances of the networks for the actor and the two critics
        self.actor = Actor(state_dim, action_dim, max_action, actor_lr)
        self.critic = Critic(
            state_dim, action_dim, critic_lr
        )  # the critic class encapsulates two copies of the neural network for the two critics used in TD3

        # instance of the target networks for the actor and the two critics
        self.target_actor = Actor(state_dim, action_dim, max_action, actor_lr)
        self.target_critic = Critic(state_dim, action_dim, critic_lr)

        # initialise the targets to the same weight as their corresponding current networks
        self.target_actor.load_state_dict(self.actor.state_dict())
        self.target_critic.load_state_dict(self.critic.state_dict())

        # since we do not learn/train on the target networks
        self.target_actor.eval()
        self.target_critic.eval()

        # for test mode
        if not self.train_mode:
            self.actor.eval()
            self.critic.eval()

        self.actor.to(self.device)
        self.critic.to(self.device)
        self.target_actor.to(self.device)
        self.target_critic.to(self.device)

    def select_action(self, state, exploration_noise=0.1):
        """
        Function to returns the appropriate action for the given state.
        During training, it returns adds a zero-mean gaussian noise with std=exploration_noise to the action to encourage exploration.
        No noise is added to the action decision during testing mode.

        Parameters
        ---
        state: vector or tensor
            The current state of the environment as observed by the agent
        exploration_noise: float, optional
            Standard deviation, i.e. sigma, of the Gaussian noise to be added to the agent's action to encourage exploration

        Returns
        ---
        A numpy array representing the noisy action to be performed by the agent in the current state
        """

        if not torch.is_tensor(state):
            state = torch.tensor([state], dtype=torch.float32).to(self.device)

        act = self.actor(state).cpu().data.numpy().flatten(
        )  # performs inference using the actor based on the current state as the input and returns the corresponding np array

        if not self.train_mode:
            exploration_noise = 0.0  # since we do not need noise to be added to the action during testing

        noise = np.random.normal(
            0.0, exploration_noise, size=act.shape
        )  # generate the zero-mean gaussian noise with standard deviation determined by exploration_noise

        noisy_action = act + noise
        noisy_action = noisy_action.clip(
            min=-self.max_action, max=self.max_action
        )  # to ensure that the noisy action being returned is within the limit of "legal" actions afforded to the agent; assumes action range is symmetric

        return noisy_action

    def learn(self, current_iteration, batchsize):
        """
        Function to perform the updates on the 6 neural networks that run the TD3 algorithm.

        Parameters
        ---
        current_iteration: int
            Total number of steps that have been performed by the agent
        batchsize: int
            Number of experiences to be randomly sampled from the memory for the agent to learn from

        Returns
        ---
        none
        """

        if len(self.memory) < batchsize:
            return
        states, actions, next_states, rewards, dones = self.memory.sample(
            batchsize, self.device
        )  # a batch of experiences randomly sampled form the memory

        # ensure that the actions and rewards tensors have the appropriate shapes
        actions = actions.view(-1, self.action_dim)
        rewards = rewards.view(-1, 1)

        # generate noisy target actions for target policy smoothing
        pred_action = self.target_actor(next_states)
        noise = torch.zeros_like(pred_action).normal_(
            0, self.policy_noise_std).to(self.device)
        noise = torch.clamp(noise,
                            min=-self.policy_noise_clip,
                            max=self.policy_noise_clip)
        noisy_pred_action = torch.clamp(pred_action + noise,
                                        min=-self.max_action,
                                        max=self.max_action)

        # calculate TD-Target using Clipped Double Q-learning
        target_q1, target_q2 = self.target_critic(next_states,
                                                  noisy_pred_action)
        target_q = torch.min(target_q1, target_q2)
        target_q[
            dones] = 0.0  # being in a terminal state implies there are no more future states that the agent would encounter in the given episode and so set the associated Q-value to 0
        y = rewards + self.discount * target_q

        current_q1, current_q2 = self.critic(
            states, actions
        )  # the critic class encapsulates two copies of the neural network thereby returning two Q values with each forward pass

        critic_loss = F.mse_loss(current_q1, y) + F.mse_loss(
            current_q2, y
        )  # the losses of the two critics need to be added as there is only one optimiser shared between the two networks
        critic_loss = critic_loss.mean()

        self.critic.optimizer.zero_grad()
        critic_loss.backward()
        self.critic.optimizer.step()

        # delayed policy and target updates
        if current_iteration % self.update_freq == 0:

            # actor loss is calculated by a gradient ascent along crtic 1, thus need to apply the negative sign to convert to a gradient descent
            pred_current_actions = self.actor(states)
            pred_current_q1, _ = self.critic(
                states, pred_current_actions
            )  # since we only need the Q-value from critic 1, we can ignore the second value obtained through the forward pass

            actor_loss = -pred_current_q1.mean()

            self.actor.optimizer.zero_grad()
            actor_loss.backward()
            self.actor.optimizer.step()

            # apply slow-update to all three target networks
            self.soft_update_targets()

    def soft_update_net(self, source_net_params, target_net_params):
        """
        Function to perform Polyak averaging to update the parameters of the provided network

        Parameters
        ---
        source_net_params: list
            trainable parameters of the source, ie. current version of the network
        target_net_params: list
            trainable parameters of the corresponding target network

        Returns
        ---
        none
        """

        for source_param, target_param in zip(source_net_params,
                                              target_net_params):
            target_param.data.copy_(self.tau * source_param.data +
                                    (1 - self.tau) * target_param.data)

    def soft_update_targets(self):
        """
        Function that calls Polyak averaging on all three target networks

        Parameters
        ---
        none

        Returns
        ---
        none
        """

        self.soft_update_net(self.actor.parameters(),
                             self.target_actor.parameters())
        self.soft_update_net(self.critic.parameters(),
                             self.target_critic.parameters())

    def save(self, path, model_name):
        """
        Function to save the actor and critic networks

        Parameters
        ---
        path: str
            Location where the model is to be saved
        model_name: str
            Name of the model

        Returns
        ---
        none
        """

        self.actor.save_model('{}/{}_actor'.format(path, model_name))
        self.critic.save_model('{}/{}_critic'.format(path, model_name))

    def load(self, model_name):
        """
        Function to load the actor and critic networks

        Parameters
        ---
        path: str
            Location where the model is saved
        model_name: str
            Name of the model

        Returns
        ---
        none
        """

        self.actor.load_model('{}/{}_actor'.format(path, model_name))
        self.critic.load_model('{}/{}_critic'.format(path, model_name))

Пример #5

Показать файл

def adversarial_debiasing(model_state_dict, data, config, device):
    logger.info('Training Adversarial model.')
    actor = load_model(data.num_features, config.get('hyperparameters', {}))
    actor.load_state_dict(model_state_dict)
    actor.to(device)
    hid = config['hyperparameters'][
        'hid'] if 'hyperparameters' in config else 32
    critic = Critic(hid * config['adversarial']['batch_size'],
                    num_deep=config['adversarial']['num_deep'],
                    hid=hid)
    critic.to(device)
    critic_optimizer = optim.Adam(critic.parameters())
    critic_loss_fn = torch.nn.MSELoss()

    actor_optimizer = optim.Adam(actor.parameters(),
                                 lr=config['adversarial']['lr'])
    actor_loss_fn = torch.nn.BCELoss()

    for epoch in range(config['adversarial']['epochs']):
        for param in critic.parameters():
            param.requires_grad = True
        for param in actor.parameters():
            param.requires_grad = False
        actor.eval()
        critic.train()
        for step in range(config['adversarial']['critic_steps']):
            critic_optimizer.zero_grad()
            indices = torch.randint(0, data.X_valid.size(0),
                                    (config['adversarial']['batch_size'], ))
            cX_valid = data.X_valid_gpu[indices]
            cy_valid = data.y_valid[indices]
            cp_valid = data.p_valid[indices]
            with torch.no_grad():
                scores = actor(cX_valid)[:, 0].reshape(-1).cpu().numpy()

            bias = compute_bias(scores, cy_valid.numpy(), cp_valid,
                                config['metric'])

            res = critic(actor.trunc_forward(cX_valid))
            loss = critic_loss_fn(torch.tensor([bias], device=device), res[0])
            loss.backward()
            train_loss = loss.item()
            critic_optimizer.step()
            if (epoch % 10 == 0) and (step % 100 == 0):
                logger.info(
                    f'=======> Critic Epoch: {(epoch, step)} loss: {train_loss}'
                )

        for param in critic.parameters():
            param.requires_grad = False
        for param in actor.parameters():
            param.requires_grad = True
        actor.train()
        critic.eval()
        for step in range(config['adversarial']['actor_steps']):
            actor_optimizer.zero_grad()
            indices = torch.randint(0, data.X_valid.size(0),
                                    (config['adversarial']['batch_size'], ))
            cy_valid = data.y_valid_gpu[indices]
            cX_valid = data.X_valid_gpu[indices]

            pred_bias = critic(actor.trunc_forward(cX_valid))
            bceloss = actor_loss_fn(actor(cX_valid)[:, 0], cy_valid)

            # loss = lam*abs(pred_bias) + (1-lam)*loss
            objloss = max(
                1, config['adversarial']['lambda'] *
                (abs(pred_bias[0][0]) - config['objective']['epsilon'] +
                 config['adversarial']['margin']) + 1) * bceloss

            objloss.backward()
            train_loss = objloss.item()
            actor_optimizer.step()
            if (epoch % 10 == 0) and (step % 100 == 0):
                logger.info(
                    f'=======> Actor Epoch: {(epoch, step)} loss: {train_loss}'
                )

        if epoch % 10 == 0:
            with torch.no_grad():
                scores = actor(data.X_valid_gpu)[:,
                                                 0].reshape(-1,
                                                            1).cpu().numpy()
                _, best_adv_obj = get_best_thresh(
                    scores,
                    np.linspace(0, 1, 1001),
                    data,
                    config,
                    valid=False,
                    margin=config['adversarial']['margin'])
                logger.info(f'Objective: {best_adv_obj}')

    logger.info('Finding optimal threshold for Adversarial model.')
    with torch.no_grad():
        scores = actor(data.X_valid_gpu)[:, 0].reshape(-1, 1).cpu().numpy()

    best_adv_thresh, _ = get_best_thresh(
        scores,
        np.linspace(0, 1, 1001),
        data,
        config,
        valid=False,
        margin=config['adversarial']['margin'])

    logger.info('Evaluating Adversarial model on best threshold.')
    with torch.no_grad():
        labels = (actor(data.X_valid_gpu)[:, 0] > best_adv_thresh).reshape(
            -1, 1).cpu().numpy()
    results_valid = get_valid_objective(labels, data, config)
    logger.info(f'Results: {results_valid}')

    with torch.no_grad():
        labels = (actor(data.X_test_gpu)[:, 0] > best_adv_thresh).reshape(
            -1, 1).cpu().numpy()
    results_test = get_test_objective(labels, data, config)

    return results_valid, results_test