示例#1
0
    def __init__(self,
                 params,
                 get_embeddings=True,
                 use_batchnorm=True,
                 use_dropout=True,
                 use_fm_second_order=False):
        super(xDeepFM, self).__init__()
        self.device = params['device']
        self.mlp_input_dim = params['field_size'] * params['embedding_size']
        self.use_fm_second_order = use_fm_second_order

        self.first_order = FirstOrder(params)
        self.second_order = SecondOrder(params, get_embeddings=get_embeddings)
        self.mlp = MLP(params,
                       use_batchnorm=use_batchnorm,
                       use_dropout=use_dropout)
        self.cin = CIN(params)
        if params['split_half']:
            cinOutputSize = reduce(lambda x, y: x // 2 + y // 2,
                                   params['cin_hidden_dims'])
        else:
            cinOutputSize = reduce(lambda x, y: x + y,
                                   params['cin_hidden_dims'])
        if self.use_fm_second_order:
            concat_size = params['field_size'] + params[
                'embedding_size'] + params['hidden_dims'][-1] + cinOutputSize
        else:
            concat_size = params['field_size'] + params['hidden_dims'][
                -1] + cinOutputSize

        self.concat_layer = nn.Linear(concat_size, 1).to(self.device)
示例#2
0
    def __init__(self,
                 params,
                 get_embeddings=True,
                 use_batchnorm=True,
                 use_dropout=True,
                 use_fm=True,
                 use_deep=True):
        super(DeepFM, self).__init__()
        self.device = params['device']
        self.mlp_input_dim = params['field_size'] * params['embedding_size']
        self.use_fm = use_fm
        self.use_deep = use_deep

        self.first_order = FirstOrder(params)
        self.second_order = SecondOrder(params, get_embeddings=get_embeddings)
        self.mlp = MLP(params,
                       use_batchnorm=use_batchnorm,
                       use_dropout=use_dropout)

        ## final concat layer
        if self.use_fm and self.use_deep:
            concat_size = params['field_size'] + params[
                'embedding_size'] + params['hidden_dims'][-1]
        elif self.use_deep:
            concat_size = params['hidden_dims'][-1]
        elif self.use_fm:
            concat_size = params['field_size'] + params['embedding_size']
        self.concat_layer = nn.Linear(concat_size, 1).to(self.device)
示例#3
0
    def __init__(self,
                 observation_space,
                 action_space,
                 use_cuda=False,
                 batch_size=32,
                 gamma=0.9,
                 tau=50,
                 memory_capacity=1000):
        """Initialize model parameters and training progress variables

        Args:
            observation_space (gym.spaces): a spaces object from gym.spaces module
            action_space (gym.spaces): same as above
            batch_size (int): number of events to be trained in one batch
            gamma (float): discount factor for future rewards
            tau (int): number of episodes delayed before syncing target network
            memory_capacity (int): size of memory
        """
        self.Tensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
        self.LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau
        self.tau_offset = 0
        self.replay_memory = Memory(memory_capacity)

        self.input_size = self._linear_size(observation_space)
        self.output_size = self._linear_size(action_space)
        self.eval_Q = MLP(self.input_size, self.output_size)  # online network
        self.target_Q = MLP(self.input_size,
                            self.output_size)  # target network
        self.target_Q.load_state_dict(self.eval_Q.state_dict(
        ))  # sync target network with online network
        if use_cuda:
            self.eval_Q.cuda()
            self.target_Q.cuda()
        self.optimizer = torch.optim.RMSprop(
            self.eval_Q.parameters())  # RMSprop for learning eval_Q parameters
        self.criterion = nn.MSELoss(
        )  # mean squared error, similar to least squared error
示例#4
0
 def setup(self):
     """
     Build with no input shape
     """
     # build params
     self.g_theta = MLP(self.n_units, 2, activation="relu")
     self.scale = self.add_weight("scale", shape=(1, ))
     self.scale_shift = self.add_weight("scale_shift", shape=(1, ))
     # input mask
     if self.left_cond:
         self.mask = np.array([1.0, 0.0])
     else:
         self.mask = np.array([0.0, 1.0])
示例#5
0
    def __init__(self, params, use_batchnorm=True, use_dropout=True):
        super(DIN, self).__init__()

        self.device = params['device']
        self.feature_size = params['feature_size']
        self.embedding_size = params['embedding_size']
        self.userItemDict = params['userItemDict']
        self.hidden_dims = params['hidden_dims']
        self.userItemMaxLen = params['userItemMaxLen']

        feature_embeddings = torch.empty(self.feature_size + 1,
                                         self.embedding_size,
                                         dtype=torch.float32,
                                         device=self.device,
                                         requires_grad=True)
        nn.init.normal_(feature_embeddings)
        self.feature_embeddings = nn.Parameter(feature_embeddings)

        self.mlp = MLP(params,
                       use_batchnorm=use_batchnorm,
                       use_dropout=use_dropout)

        self.output_layer = nn.Linear(self.hidden_dims[-1], 1).to(self.device)
示例#6
0
        act_list.append(torch.as_tensor(actions))
        reward_list.append(reward)

    state_tens = torch.stack(state_list)
    act_tens = torch.stack(act_list)
    preprocess_sum = torch.as_tensor(sum(reward_list))
    nstate_tens = (state_tens - policy.state_means) / policy.state_std
    reward_list = postprocess(torch.tensor(reward_list), nstate_tens, act_tens)
    reward_sum = torch.as_tensor(sum(reward_list))

    return state_tens, reward_sum, preprocess_sum


if __name__ == "__main__":
    torch.set_default_dtype(torch.float64)
    from seagul.nn import MLP

    env_name = "HalfCheetah-v2"
    env = gym.make(env_name)
    in_size = env.observation_space.shape[0]
    out_size = env.action_space.shape[0]

    policy = MLP(in_size, out_size, 0, 0, bias=False)

    policy, r_hist, lr_hist = ars(env_name,
                                  policy,
                                  20,
                                  n_workers=8,
                                  n_delta=32,
                                  n_top=16)
示例#7
0
 def build(self, input_shape):
     # weight, mean, stddev for each k component
     self.dense_nn = MLP(self.n_units, self.k * 3, activation="tanh")
示例#8
0
class DoubleDQN:
    """Double DQN model

    Paramters naming and natation are followed from the original paper:
    Deep Reinforcement Learning with Double Q-learning (2015)
    https://arxiv.org/abs/1509.06461
    """
    def __init__(self,
                 observation_space,
                 action_space,
                 use_cuda=False,
                 batch_size=32,
                 gamma=0.9,
                 tau=50,
                 memory_capacity=1000):
        """Initialize model parameters and training progress variables

        Args:
            observation_space (gym.spaces): a spaces object from gym.spaces module
            action_space (gym.spaces): same as above
            batch_size (int): number of events to be trained in one batch
            gamma (float): discount factor for future rewards
            tau (int): number of episodes delayed before syncing target network
            memory_capacity (int): size of memory
        """
        self.Tensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
        self.LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau
        self.tau_offset = 0
        self.replay_memory = Memory(memory_capacity)

        self.input_size = self._linear_size(observation_space)
        self.output_size = self._linear_size(action_space)
        self.eval_Q = MLP(self.input_size, self.output_size)  # online network
        self.target_Q = MLP(self.input_size,
                            self.output_size)  # target network
        self.target_Q.load_state_dict(self.eval_Q.state_dict(
        ))  # sync target network with online network
        if use_cuda:
            self.eval_Q.cuda()
            self.target_Q.cuda()
        self.optimizer = torch.optim.RMSprop(
            self.eval_Q.parameters())  # RMSprop for learning eval_Q parameters
        self.criterion = nn.MSELoss(
        )  # mean squared error, similar to least squared error

    def _linear_size(self, gym_space):
        """Calculate the size of input/output based on descriptive structure (i.e.
        observation_space/action_space) defined by gym.spaces
        """
        res = 0
        if isinstance(gym_space, spaces.Tuple):
            for space in gym_space.spaces:
                res += self._linear_size(space)
            return res
        elif isinstance(gym_space, spaces.MultiBinary) or \
             isinstance(gym_space, spaces.Discrete):
            return gym_space.n
        elif isinstance(gym_space, spaces.Box):
            return reduce(lambda x, y: x * y, gym_space.shape)
        else:
            raise NotImplementedError

    def action(self, obs):
        with torch.no_grad():  # interence only
            obs_var = Variable(self.Tensor(obs))
            _, action = torch.max(self.eval_Q(obs_var), 0)
        return action.item()

    def optimize(self, obs, action, next_obs, reward):
        """Update memory based on given data
        Train the model if memory capacity reach batch size
        """
        self.replay_memory.add_event(
            Memory.Event(obs.copy(), action, next_obs.copy(), reward))
        if self.batch_size <= len(self.replay_memory.mem):
            if self.tau == self.tau_offset:
                self.tau_offset = 0
                self.target_Q.load_state_dict(self.eval_Q.state_dict())
            # sample from replay memory
            mini_batch = self.replay_memory.sample(self.batch_size)
            mini_batch = Memory.Event(
                *zip(*mini_batch))  # do this for batch processing

            # calculate the estimated value
            estimated_value = self.eval_Q(
                Variable(self.Tensor(mini_batch.state)))
            # select the value associated with the action taken
            estimated_value = estimated_value.gather(
                1, Variable(self.LongTensor(
                    mini_batch.action).unsqueeze_(1)))  # Q(S_t, A_t; theta_t)

            argmax_action = self.eval_Q(
                Variable(
                    self.Tensor([
                        next_state for next_state in mini_batch.next_state
                        if next_state is not None
                    ])))
            _, argmax_action = torch.max(argmax_action,
                                         1)  # argmax_a Q(S_{t+1}, a; theta_t)

            # calculate target network value
            target_value = self.target_Q(
                Variable(
                    self.Tensor([
                        next_state for next_state in mini_batch.next_state
                        if next_state is not None
                    ])))
            target_value = target_value.gather(
                1, Variable(argmax_action.unsqueeze_(1))
            )  # Q(S_{t+1}, argmax_a Q(S_{t+1}, a; theta_t); theta_t^-)
            target_value *= self.gamma
            target_value += Variable(
                self.Tensor(mini_batch.reward).unsqueeze_(1))  # R_{t+1}

            # compute the loss between estimated value and target value
            self.optimizer.zero_grad()
            loss = self.criterion(estimated_value, target_value.detach())
            loss.backward()  # calculate gradient
            self.optimizer.step()  # apply calculated gradient

            self.tau_offset += 1