Пример #1
0
    def recompute_candidateset(cls, user, short_feature, long_feature, id_article_mapping, subset=None):
        # NOTE: cls method is avoiding rebuild featrure matrix
        if subset is not None:
            full_warm_seq_ids = subset
        else:
            full_warm_seq_ids = user.get_full_dataset()
        if not full_warm_seq_ids:
            return

        usable_id_feature_pairs = ((id_article_mapping[_id][0], id_article_mapping[_id][0].feature_matrix, id_article_mapping[_id][1]) for _id in full_warm_seq_ids \
                if _id in id_article_mapping)
        try:
            usable_articles, usable_features, usable_article_scores = izip(*usable_id_feature_pairs)
        except ValueError:
            # NOTE: no usable id in id_article_mapping
            return
        if not usable_articles:
            return
        article_union_matrix = sp.vstack(usable_features)
        usable_short_urs = batch_calculate_similarity(short_feature, article_union_matrix)
        usable_long_urs = batch_calculate_similarity(long_feature, article_union_matrix)
        pipeline = warm_conn.pipeline()
        for article, article_scores, short_ur, long_ur in izip(usable_articles, usable_article_scores, usable_short_urs, usable_long_urs):
            short_qht_score, long_qht_score = article_scores
            short_score = 1000*short_ur + short_qht_score
            long_score = 1000*long_ur + long_qht_score
            User.add2pipeline(pipeline, user, article, short_score, long_score)
        pipeline.execute()
Пример #2
0
 def remove_articles_from_candidateset(self, articles):
     pipeline = warm_conn.pipeline()
     for article in articles:
         keys = self.get_category_keys(article.category)
         for key in keys:
             pipeline.zrem(key, article.seq_id)
     pipeline.execute()
Пример #3
0
 def get_all_key_ids_iteritems(self):
     dataset_keys = self.get_all_dataset_keys()
     pipeline = warm_conn.pipeline()
     for key in dataset_keys:
         pipeline.zrange(key, 0, -1)
     ids_list = pipeline.execute()
     return izip(dataset_keys, ids_list)
Пример #4
0
 def recompute_common_candidateset(cls, common_candidate_ids, id_article_mapping):
     from people.mixins import default_key_user
     pipeline = warm_conn.pipeline()
     for _id in common_candidate_ids:
         if not _id in id_article_mapping:
             continue
         article, scores = id_article_mapping[_id]
         short_score, long_score = scores
         User.add2pipeline(pipeline, default_key_user, article, short_score, long_score)
     pipeline.execute()
Пример #5
0
 def copy_warm_data(self, uk_obj):
     pipeline = warm_conn.pipeline()
     dataset_keys = uk_obj.get_all_dataset_keys()
     for key in dataset_keys:
         pipeline.zrange(key, 0, -1, withscores=True)
     mapping_list = pipeline.execute()
     new_dataset_keys = self.get_all_dataset_keys()
     for key, mapping in izip(new_dataset_keys, mapping_list):
         if not mapping:
             continue
         pipeline.zadd(key, **dict(mapping))
     pipeline.execute()
Пример #6
0
    def batch_add_to_users_candidateset(self):
        if not self.usable:
            return
        from people.models import User
        from people.mixins import default_key_user

        users = User.objects.all().only('id', 'seq_id', 'feature', 'recent_feature').order_by('id')
        short_ur_mapping = self._generate_user_relation_scores(users, 'recent_feature')
        long_ur_mapping = self._generate_user_relation_scores(users)
        if not len(short_ur_mapping) == len(long_ur_mapping):
            # NOTE: logging
            raise Exception('Notice')
        update_time = get_global_cal_time()
        default_short_score, default_long_score = calculate_scores(0, 0, self.published_at, self.quality, self.hot, update_time)
        seq_id = str(self.seq_id)

        pipeline = warm_conn.pipeline()
        keys = default_key_user.get_category_keys(self.category)

        long_key = default_key_user.rec_longterm_dataset_key
        pipeline.zadd(long_key, seq_id, default_long_score)
        long_cate_recs = default_key_user.get_longterm_dataset_key(self.category)
        for long_cate_rec in long_cate_recs:
            pipeline.zadd(long_cate_rec, seq_id, default_long_score)

        short_key = default_key_user.rec_shortterm_dataset_key
        pipeline.zadd(short_key, seq_id, default_long_score)

        short_cate_recs = default_key_user.get_shortterm_dataset_key(self.category)
        for short_cate_rec in short_cate_recs:
            pipeline.zadd(short_cate_rec, seq_id, default_short_score)

        for u in users:
            short_ur_score = short_ur_mapping[u.seq_id]
            long_ur_score = long_ur_mapping[u.seq_id]
            short_score = 1000*short_ur_score + default_short_score
            long_score = 1000*long_ur_score + default_long_score
            long_key = u.rec_longterm_dataset_key
            pipeline.zadd(long_key, seq_id, long_score)
            long_cate_recs = u.get_longterm_dataset_key(self.category)
            for long_cate_rec in long_cate_recs:
                pipeline.zadd(long_cate_rec, seq_id, long_score)
            short_key = u.rec_shortterm_dataset_key
            pipeline.zadd(short_key, seq_id, short_score)
            short_cate_recs = u.get_shortterm_dataset_key(self.category)
            for short_cate_rec in short_cate_recs:
                pipeline.zadd(short_cate_rec, seq_id, short_score)
        pipeline.execute()
Пример #7
0
 def clear_dataset(self, earliest_article, useless_ids):
     '''
     clear outdated data
     '''
     if not earliest_article and not useless_ids:
         return
     if earliest_article:
         filter_func = lambda x: int(x) < earliest_article.seq_id
     else:
         filter_func = lambda x: int(x) in useless_ids
     key_ids_pairs = self.get_all_key_ids_iteritems()
     pipeline = warm_conn.pipeline()
     for key, ids in key_ids_pairs:
         if ids:
             ids_to_del = [int(_id) for _id in ids if filter_func(_id)]
             if ids_to_del:
                 pipeline.zrem(key, *ids_to_del)
     pipeline.execute()
Пример #8
0
 def remove_from_candidateset(self, user):
     keys = user.get_category_keys(self.category)
     pipeline = warm_conn.pipeline()
     for key in keys:
         pipeline.zrem(key, self.seq_id)
     pipeline.execute()