def __init__(self,
                 data_dir: Text,
                 rank_k: int,
                 batch_size: int = 1,
                 num_movies: int = 20):
        """Initializes the MovieLens Bandit environment.

    Args:
      data_dir: (string) Directory where the data lies (in text form).
      rank_k : (int) Which rank to use in the matrix factorization.
      batch_size: (int) Number of observations generated per call.
      num_movies: (int) Only the first `num_movies` movies will be used by the
        environment. The rest is cut out from the data.
    """
        self._num_actions = num_movies
        self._batch_size = batch_size
        self._context_dim = rank_k

        # Compute the matrix factorization.
        self._data_matrix = dataset_utilities.load_movielens_data(data_dir)
        # Keep only the first items.
        self._data_matrix = self._data_matrix[:, :num_movies]
        # Filter the users with no iterm rated.
        nonzero_users = list(
            np.nonzero(np.sum(self._data_matrix, axis=1) > 0.0)[0])
        self._data_matrix = self._data_matrix[nonzero_users, :]
        self._effective_num_users = len(nonzero_users)

        # Compute the SVD.
        u, s, vh = np.linalg.svd(self._data_matrix, full_matrices=False)

        # Keep only the largest singular values.
        self._u_hat = u[:, :rank_k] * np.sqrt(s[:rank_k])
        self._v_hat = np.transpose(
            np.transpose(vh[:rank_k, :]) * np.sqrt(s[:rank_k]))
        self._approx_ratings_matrix = np.matmul(self._u_hat, self._v_hat)

        self._current_users = np.zeros(batch_size)
        self._previous_users = np.zeros(batch_size)

        self._action_spec = array_spec.BoundedArraySpec(
            shape=(),
            dtype=np.int32,
            minimum=0,
            maximum=self._num_actions - 1,
            name='action')
        observation_spec = array_spec.ArraySpec(shape=(self._context_dim, ),
                                                dtype=np.float64,
                                                name='observation')
        self._time_step_spec = ts.time_step_spec(observation_spec)
        self._observation = np.zeros((self._batch_size, self._context_dim))

        self._optimal_action_table = np.argmax(self._approx_ratings_matrix,
                                               axis=1)
        self._optimal_reward_table = np.max(self._approx_ratings_matrix,
                                            axis=1)

        super(MovieLensPyEnvironment, self).__init__(observation_spec,
                                                     self._action_spec)
示例#2
0
    def __init__(self,
                 data_dir: Text,
                 rank_k: int,
                 batch_size: int = 1,
                 num_actions: int = 50):
        """Initializes the Per-arm MovieLens Bandit environment.

    Args:
      data_dir: (string) Directory where the data lies (in text form).
      rank_k : (int) Which rank to use in the matrix factorization. This will
        also be the feature dimension of both the user and the movie features.
      batch_size: (int) Number of observations generated per call.
      num_actions: (int) How many movies to choose from per round.
    """
        self._batch_size = batch_size
        self._context_dim = rank_k
        self._num_actions = num_actions

        # Compute the matrix factorization.
        self._data_matrix = dataset_utilities.load_movielens_data(data_dir)
        self._num_users, self._num_movies = self._data_matrix.shape

        # Compute the SVD.
        u, s, vh = np.linalg.svd(self._data_matrix, full_matrices=False)

        # Keep only the largest singular values.
        self._u_hat = u[:, :rank_k].astype(np.float32)
        self._s_hat = s[:rank_k].astype(np.float32)
        self._v_hat = np.transpose(vh[:rank_k]).astype(np.float32)

        self._approx_ratings_matrix = np.matmul(self._u_hat * self._s_hat,
                                                np.transpose(self._v_hat))

        self._action_spec = array_spec.BoundedArraySpec(shape=(),
                                                        dtype=np.int32,
                                                        minimum=0,
                                                        maximum=num_actions -
                                                        1,
                                                        name='action')
        observation_spec = {
            GLOBAL_KEY:
            array_spec.ArraySpec(shape=[rank_k], dtype=np.float32),
            PER_ARM_KEY:
            array_spec.ArraySpec(shape=[num_actions, rank_k],
                                 dtype=np.float32),
        }
        self._time_step_spec = ts.time_step_spec(observation_spec)

        self._current_user_indices = np.zeros(batch_size, dtype=np.int32)
        self._previous_user_indices = np.zeros(batch_size, dtype=np.int32)

        self._current_movie_indices = np.zeros([batch_size, num_actions],
                                               dtype=np.int32)
        self._previous_movie_indices = np.zeros([batch_size, num_actions],
                                                dtype=np.int32)

        self._observation = {
            GLOBAL_KEY: np.zeros([batch_size, rank_k]),
            PER_ARM_KEY: np.zeros([batch_size, num_actions, rank_k]),
        }

        super(MovieLensPerArmPyEnvironment,
              self).__init__(observation_spec, self._action_spec)