def build(self, task: Task, base_dataset: Dataset, epochs=50, components=200, gmm_clusters=10, logloss_weight=2.0, lr=5e-3, batch_size=5000): task.set_user_properties( **to_clear_ml_params(locals(), ["task", "self"])) self.item_ids = list(range(len(base_dataset.id_to_item))) self.avg_ratings_per_user = base_dataset.n_ratings / base_dataset.n_users if self.user_vectors is None: self._build_mf(base_dataset, task, components) self._build_torch_model(base_dataset, task, components) self._train_torch_model(base_dataset, task, epochs, logloss_weight=logloss_weight, lr=lr, batch_size=batch_size) self._build_gmm(task, gmm_clusters)
def build(self, task: Task, base_dataset: Dataset, epochs=50, components=200, gmm_clusters=10, logloss_weight=2.0, lr=5e-3, epochs_logloss_only=25): task.set_user_properties( **to_clear_ml_params(locals(), ["task", "self"])) s_coo = base_dataset.rating_matrix.tocoo() self._implicit_coef = 1.0 self.user_map = {} self.user_ids = [] self.avg_rating = np.mean(s_coo.data) for u, i, r in zip(s_coo.row, s_coo.col, s_coo.data): self.user_ids.append(u) if u not in self.user_map: self.user_map[u] = {} self.user_map[u][i] = r - self.avg_rating self.user_map_list = { k: list(v.keys()) for k, v in self.user_map.items() } self.user_ids = list(set(self.user_ids)) self.item_ids = list(range(len(base_dataset.id_to_item))) self.avg_ratings_per_user = base_dataset.n_ratings / base_dataset.n_users if self.user_vectors is None: self._build_mf(base_dataset, task, components) self._build_torch_model(base_dataset, task, components) self._train_torch_model(base_dataset, task, epochs, logloss_weight=logloss_weight, lr=lr, epochs_logloss_only=epochs_logloss_only) self._build_gmm(task, gmm_clusters)