def __init__(self, data_dir: Text, rank_k: int, batch_size: int = 1, num_movies: int = 20): """Initializes the MovieLens Bandit environment. Args: data_dir: (string) Directory where the data lies (in text form). rank_k : (int) Which rank to use in the matrix factorization. batch_size: (int) Number of observations generated per call. num_movies: (int) Only the first `num_movies` movies will be used by the environment. The rest is cut out from the data. """ self._num_actions = num_movies self._batch_size = batch_size self._context_dim = rank_k # Compute the matrix factorization. self._data_matrix = dataset_utilities.load_movielens_data(data_dir) # Keep only the first items. self._data_matrix = self._data_matrix[:, :num_movies] # Filter the users with no iterm rated. nonzero_users = list( np.nonzero(np.sum(self._data_matrix, axis=1) > 0.0)[0]) self._data_matrix = self._data_matrix[nonzero_users, :] self._effective_num_users = len(nonzero_users) # Compute the SVD. u, s, vh = np.linalg.svd(self._data_matrix, full_matrices=False) # Keep only the largest singular values. self._u_hat = u[:, :rank_k] * np.sqrt(s[:rank_k]) self._v_hat = np.transpose( np.transpose(vh[:rank_k, :]) * np.sqrt(s[:rank_k])) self._approx_ratings_matrix = np.matmul(self._u_hat, self._v_hat) self._current_users = np.zeros(batch_size) self._previous_users = np.zeros(batch_size) self._action_spec = array_spec.BoundedArraySpec( shape=(), dtype=np.int32, minimum=0, maximum=self._num_actions - 1, name='action') observation_spec = array_spec.ArraySpec(shape=(self._context_dim, ), dtype=np.float64, name='observation') self._time_step_spec = ts.time_step_spec(observation_spec) self._observation = np.zeros((self._batch_size, self._context_dim)) self._optimal_action_table = np.argmax(self._approx_ratings_matrix, axis=1) self._optimal_reward_table = np.max(self._approx_ratings_matrix, axis=1) super(MovieLensPyEnvironment, self).__init__(observation_spec, self._action_spec)
def __init__(self, data_dir: Text, rank_k: int, batch_size: int = 1, num_actions: int = 50): """Initializes the Per-arm MovieLens Bandit environment. Args: data_dir: (string) Directory where the data lies (in text form). rank_k : (int) Which rank to use in the matrix factorization. This will also be the feature dimension of both the user and the movie features. batch_size: (int) Number of observations generated per call. num_actions: (int) How many movies to choose from per round. """ self._batch_size = batch_size self._context_dim = rank_k self._num_actions = num_actions # Compute the matrix factorization. self._data_matrix = dataset_utilities.load_movielens_data(data_dir) self._num_users, self._num_movies = self._data_matrix.shape # Compute the SVD. u, s, vh = np.linalg.svd(self._data_matrix, full_matrices=False) # Keep only the largest singular values. self._u_hat = u[:, :rank_k].astype(np.float32) self._s_hat = s[:rank_k].astype(np.float32) self._v_hat = np.transpose(vh[:rank_k]).astype(np.float32) self._approx_ratings_matrix = np.matmul(self._u_hat * self._s_hat, np.transpose(self._v_hat)) self._action_spec = array_spec.BoundedArraySpec(shape=(), dtype=np.int32, minimum=0, maximum=num_actions - 1, name='action') observation_spec = { GLOBAL_KEY: array_spec.ArraySpec(shape=[rank_k], dtype=np.float32), PER_ARM_KEY: array_spec.ArraySpec(shape=[num_actions, rank_k], dtype=np.float32), } self._time_step_spec = ts.time_step_spec(observation_spec) self._current_user_indices = np.zeros(batch_size, dtype=np.int32) self._previous_user_indices = np.zeros(batch_size, dtype=np.int32) self._current_movie_indices = np.zeros([batch_size, num_actions], dtype=np.int32) self._previous_movie_indices = np.zeros([batch_size, num_actions], dtype=np.int32) self._observation = { GLOBAL_KEY: np.zeros([batch_size, rank_k]), PER_ARM_KEY: np.zeros([batch_size, num_actions, rank_k]), } super(MovieLensPerArmPyEnvironment, self).__init__(observation_spec, self._action_spec)