def closet_cluster(self, x): sims = [] for m in self.models: z = project(m['V'], x) # the projected value sim = cosine_similarity([z], [m['center_z']]) sims.append(sim) return self.models[np.argmax(sims)]
def predict(self, x): model = self.closet_cluster(x) z = project(model['V'], x) dist, nbrs = model['Z_neighbors'].kneighbors([z], return_distance=True) real_idx = [model['data_idx'][i] for i in nbrs[0]] # weight by 1 / distance weights = (1 / dist).T labels = np.asarray(self.train_Y[real_idx, :].todense()) # print(weights.shape) # print(labels.shape) # print(type(weights)) # print(type(labels)) scores_per_instance = labels * weights scores = scores_per_instance.sum(axis=0) return np.array(scores).flatten()
def main(): # parameters from paper params = namedtuple('args', [ 'num_learner', 'num_clusters', 'num_threads', 'SVP_neigh', 'out_dim', 'w_thresh', 'sp_thresh', 'cost', 'NNtest', 'normalize' ]) params.num_learners = 1 # 1 params.num_clusters = 1 # 1 params.num_threads = 32 params.SVP_neigh = 250 params.out_Dim = 100 params.w_thresh = 0.01 # ? params.sp_thresh = 0.01 # ? params.NNtest = 25 params.normalize = 1 # ? params.regressor_lambda1 = 1e-6 params.regressor_lambda2 = 1e-3 params.embedding_lambda = 0.1 # determined automatically in WAltMin_asymm.m train_X, train_Y, test_X, test_Y = load_input() clusterings = [] for i in range(params.num_learners): model = KMeans(n_clusters=params.num_clusters, n_jobs=-1, n_init=8, max_iter=100) model.fit(train_X) clusterings.append(model) learners = [] for clus_model in tqdm(clusterings): models = [] for i in range(clus_model.n_clusters): # for each cluster in each learner # learn a model data_idx = np.nonzero(clus_model.labels_ == i)[0] X = train_X[data_idx, :] Y = train_Y[data_idx, :] print('embedding learning: building kNN graph') # build the kNN graph graph = kneighbors_graph(Y, params.SVP_neigh, mode='distance', metric='cosine', include_self=True, n_jobs=-1) graph.data = 1 - graph.data # convert to similarity print('embedding learning: ALS') # learn the local embedding als_model = implicit.als.AlternatingLeastSquares( factors=params.out_Dim, regularization=params.embedding_lambda) als_model.fit(graph) # the embedding # shape: #instances x embedding dim Z = als_model.item_factors print('linear regressor training') # learn the linear regressor if True: # regressor = Ridge(fit_intercept=True, alpha=params.regressor_lambda2) regressor = ElasticNet(alpha=0.1, l1_ratio=0.001) regressor.fit(X, Z) # shape: embedding dim x feature dim V = regressor.coef_ else: # learn V with l2 on V and l1 on VX ## note that X is sparse V = learn_V(X.toarray(), Z, lambda1=params.regressor_lambda1, lambda2=params.regressor_lambda2, iter_max=200, print_log=True) # the nearest neighbour model fitted_Z = X.toarray() @ V.T Z_neighbors = NearestNeighbors(n_neighbors=params.NNtest, algorithm='kd_tree').fit(fitted_Z) projected_center = project(V, clus_model.cluster_centers_[i]) learned = { 'center_z': projected_center, 'V': V, 'Z_neighbors': Z_neighbors, 'data_idx': data_idx } models.append(learned) learners.append(models) models = [Model(learner, train_Y) for learner in learners] ensemble = Ensemble(models) # predict pred_Y = ensemble.predict_many(test_X) performance = precision_at_ks(test_Y, pred_Y) # evaluation # precision@k for k, s in performance.items(): print('precision@{}: {:.4f}'.format(k, s)) # LRAP print(label_ranking_average_precision_score(test_Y.toarray(), pred_Y))
V = regressor.coef_ else: # learn V with l2 on V and l1 on VX ## note that X is sparse V = learn_V(X.toarray(), Z, lambda1=params.regressor_lambda1, lambda2=params.regressor_lambda2, iter_max=200, print_log=True) # the nearest neighbour model fitted_Z = X.toarray() @ V.T Z_neighbors = NearestNeighbors(n_neighbors=params.NNtest, metric='cosine').fit(fitted_Z) projected_center = project(V, clus_model.cluster_centers_[i]) learned = { 'center_z': projected_center, 'V': V, 'Z_neighbors': Z_neighbors, 'data_idx': data_idx } models.append(learned) learners.append(models) # In[198]: models = [Model(learner, train_Y) for learner in learners]