def recompute_candidateset(cls, user, short_feature, long_feature, id_article_mapping, subset=None): # NOTE: cls method is avoiding rebuild featrure matrix if subset is not None: full_warm_seq_ids = subset else: full_warm_seq_ids = user.get_full_dataset() if not full_warm_seq_ids: return usable_id_feature_pairs = ((id_article_mapping[_id][0], id_article_mapping[_id][0].feature_matrix, id_article_mapping[_id][1]) for _id in full_warm_seq_ids \ if _id in id_article_mapping) try: usable_articles, usable_features, usable_article_scores = izip(*usable_id_feature_pairs) except ValueError: # NOTE: no usable id in id_article_mapping return if not usable_articles: return article_union_matrix = sp.vstack(usable_features) usable_short_urs = batch_calculate_similarity(short_feature, article_union_matrix) usable_long_urs = batch_calculate_similarity(long_feature, article_union_matrix) pipeline = warm_conn.pipeline() for article, article_scores, short_ur, long_ur in izip(usable_articles, usable_article_scores, usable_short_urs, usable_long_urs): short_qht_score, long_qht_score = article_scores short_score = 1000*short_ur + short_qht_score long_score = 1000*long_ur + long_qht_score User.add2pipeline(pipeline, user, article, short_score, long_score) pipeline.execute()
def _generate_user_relation_scores(self, users, feature_name=None): if not feature_name: feature_func = lambda u, key: u.feature_matrix else: feature_func = lambda u, key: u.get_feature_by_name(key) none_users = {} usable_users = [] usable_usfs = [] for user in users: feature_matrix = feature_func(user, feature_name) if feature_matrix is None: none_users[user.seq_id] = 0 else: usable_users.append(user.seq_id) usable_usfs.append(feature_matrix) if usable_usfs: user_union_matrix = sp.vstack(usable_usfs) usable_urss = batch_calculate_similarity(self.feature_matrix, user_union_matrix) ur_ks_mapping = dict(izip(usable_users, usable_urss)) else: ur_ks_mapping = {} ur_ks_mapping.update(none_users) return ur_ks_mapping