Exemplo n.º 1
0
class Recommender(object):
    """组合各推荐算法计算结果"""
    def __init__(self):
        self.db = MysqlDB()
        self.matching = Matching()
        self.ranking = Ranking()
        self.rule_base = RuleBase()
        self.view_help = ViewFeed()

    def recommend(self, user_id, num=10, tags: list = None):
        g.tags = tags
        result_feeds = self.do_recommend(user_id, num)
        return result_feeds[:num]

    # @show_runtime
    def do_recommend(self, user_id, num=10):
        ws_user = self.db.get_user_info(user_id)
        user = User(**ws_user.to_dict()) if ws_user is not None else User(
            id=user_id)
        # 初步筛选,最少100,原因是user_id过滤
        matched_poems = self.matching.concurrent_match(user,
                                                       num=max(num * 5, 100))
        result_poems = self.ranking.rank(
            user, num=num, matched_poems=matched_poems)  # 进一步排序,组合多算法结果
        self.db.insert_history(user_id, result_poems)  # 记录观看历史
        # self.view_help.print_result_poems(result_poems, num)  # 输出推荐视频详细信息,线上注释
        return result_poems
Exemplo n.º 2
0
class Preprocessor(object):
    def __init__(self):
        self.memory = Memory()
        self.mysql_db = MysqlDB()

    @synchronized
    def load_all_feeds(self):
        """所有视频载入内存"""
        all_poems = self.mysql_db.get_all_poems()  # 2s
        all_poets = self.mysql_db.get_all_poets()  # 2s
        assert len(all_poems) > 0
        self.memory.all_poems_dict = {
            _poem.id: Poem(**_poem.to_dict())
            for _poem in all_poems
        }  # 1.5s
        self.memory.all_poets_dict = {
            _poet.id: Poet(**_poet.to_dict())
            for _poet in all_poets
        }  # 1.5s
        self.memory.all_poem_ids = list(self.memory.all_poems_dict.keys())
        self.memory.popular_poem_ids = self.get_popular_poem_ids(1000)
        log_str = '*** load {} feeds to memory'.format(
            len(self.memory.all_poems_dict))
        print(log_str)
        logging.info(log_str)

    def get_popular_poem_ids(self, num):
        """视频按照流行程度popular_index排序载入内存,0.023"""
        poem_stars = [
            self.memory.all_poems_dict[poem_id].star
            for poem_id in self.memory.all_poem_ids
        ]
        ordered_top_n_star = largest_indices(np.asarray(poem_stars),
                                             min(num, len(poem_stars)))
        popular_poem_ids = [
            self.memory.all_poem_ids[index] for index in ordered_top_n_star
        ]
        return popular_poem_ids

    @synchronized
    def task(self, app):
        app.app_context().push()
        while True:
            self.load_all_feeds()
            time.sleep(5 * 60)

    def run(self, app):
        from threading import Thread
        t = Thread(target=self.task, args=[app])
        t.start()
Exemplo n.º 3
0
 def __init__(self):
     self.mysql_db = MysqlDB()
     self.memory = Memory()
     self.load_all_poems()
Exemplo n.º 4
0
class TagTask(object):
    def __init__(self):
        self.mysql_db = MysqlDB()
        self.memory = Memory()
        self.load_all_poems()

    def load_all_poems(self):
        all_poems = self.mysql_db.get_all_poems()  # 2s
        all_poets = self.mysql_db.get_all_poets()  # 2s
        assert len(all_poems) > 0
        self.memory.all_poems_dict = {
            _poem.id: Poem(**_poem.to_dict())
            for _poem in all_poems
        }  # 1.5s
        self.memory.all_poets_dict = {
            _poet.id: Poet(**_poet.to_dict())
            for _poet in all_poets
        }  # 1.5s
        self.memory.all_poem_ids = list(self.memory.all_poems_dict.keys())

    def create_fake_history(self):
        """创建一批假的观看记录"""
        random.seed(100)
        user_count = 100
        preference_count = 30
        tangshi = random.sample(range(user_count), preference_count)  # 喜欢唐诗
        songci = random.sample(range(user_count), preference_count)  # 喜欢宋词
        yuanqu = random.sample(range(user_count), preference_count)  # 喜欢元曲
        shijing = random.sample(range(user_count), preference_count)  # 喜欢诗经
        chuci = random.sample(range(user_count), preference_count)  # 喜欢楚辞
        # yuefu = random.sample(range(user_count), preference_count)  # 喜欢乐府
        # minyao = random.sample(range(user_count), preference_count)  # 喜欢民谣
        # guwenguanzhi = random.sample(range(user_count), preference_count)  # 喜欢古文观止
        data = {
            "user_id": [],
            "poem_id": [],
            # user info
            # "province": [], "city": [],  体现在 region_tag
            "age": [],
            "gender": [],
            "device_id": [],
            # user context
            "weather_tag": [],
            "wind_tag": [],
            "temperature_tag": [],
            "time_tag": [],
            "season_tag": [],
            "festival_tag": [],
            "region_tag": [],
            # 评分
            "star": [],
            # poem context
        }
        for user_id in range(0, 100):  # 10000个用户
            record = {}
            record["user_id"] = user_id
            record["age"] = random.randint(10, 70)
            record["gender"] = random.choice(['男', '女'])
            record["device_id"] = random.choice(['Android', 'iPhone'])
            record["weather_tag"] = random.choice(weather_tags)
            record["wind_tag"] = random.choice(
                [random.choice(wind_tags), "未知"])
            record["temperature_tag"] = random.choice(temperature_tags)
            record["time_tag"] = random.choice(time_tags)
            record["season_tag"] = random.choice(season_tags)
            record["festival_tag"] = random.choice(
                [random.choice(festival_tags), "无"])
            record["region_tag"] = random.choice(region_tags)
            user_tags = set(record.values())
            for _ in range(0, random.randint(0, 100)):  # 每个用户评价100首诗
                record["poem_id"] = random.choice(self.memory.all_poem_ids)
                poem = self.memory.all_poems_dict[record["poem_id"]]
                record["star"] = len(user_tags & poem.tags)
                if user_id in tangshi and "唐诗" in poem.tags:
                    record["star"] += 5
                elif user_id in songci and "宋词" in poem.tags:
                    record["star"] += 5
                elif user_id in yuanqu and "元曲" in poem.tags:
                    record["star"] += 5
                elif user_id in shijing and "诗经" in poem.tags:
                    record["star"] += 5
                elif user_id in chuci and "楚辞" in poem.tags:
                    record["star"] += 5
                if record["age"] <= 22 and '爱情' in poem.tags:
                    record["star"] += 1
                elif record["age"] > 40 and {'怀古', '重阳', '抒情', '思念'
                                             } & poem.tags:
                    record["star"] += 1
                if record["gender"] == '男' and {'豪放', '战争', '励志'} & poem.tags:
                    record["star"] += 1
                elif record["gender"] == '女' and {'婉约', '闺怨', '读书'
                                                  } & poem.tags:
                    record["star"] += 1
                for k, v in record.items():
                    data[k].append(v)
                # 若包含则 star = 1 , 不包含则 star = 0
        df_data = pd.DataFrame(data=data)
        df_data[:int(0.7 * len(df_data))].to_csv(path_or_buf=os.path.join(
            wd_data_dir, "test.txt"),
                                                 sep=",",
                                                 index=False)
        df_data[int(0.7 * len(df_data)):].to_csv(path_or_buf=os.path.join(
            wd_data_dir, "train.txt"),
                                                 sep=",",
                                                 index=False)
        df_data.to_csv(path_or_buf="history.csv", sep=",", index=False)

    def tag_poems(self):
        """给诗词打标签"""
        poems = self.mysql_db.session.query(Poem).all()
        count = 0
        for poem in poems:
            _tags = []
            if poem.poet_id in self.memory.all_poets_dict:
                poet_city = self.memory.all_poets_dict[poem.poet_id].city
                if poet_city:
                    _tags.append(poet_city)
            content = poem.about + poem.fanyi + poem.shangxi + poem.content
            for tag in all_tags:
                if tag in content:
                    _tags.append(tag)
            # print(f"poem_name:{poem.name},poem.tags:{poem.tags},_tags:{_tags}")
            # if _tags:
            poem_tags = poem.tags.split(",") if poem.tags else []
            # print(poem.name, poem_tags, _tags)
            tags = list(
                set([
                    _tag for _tag in poem_tags + _tags if _tag not in ("", "无")
                ]))
            poem.tags = ",".join(tags)  # 修改记录
            # self.mysql_db.session.commit()  # 提交修改
            count += 1
            if count % 1000 == 0:
                self.mysql_db.session.commit()
                print(count)
        self.mysql_db.session.commit()
        print(count)

    def tag_poets(self):
        """给诗人打标签"""

    def run(self):
        self.tag_poems()
Exemplo n.º 5
0
 def __init__(self):
     self.memory = Memory()
     self.mysql_db = MysqlDB()
Exemplo n.º 6
0
 def __init__(self):
     self.db = MysqlDB()
     self.matching = Matching()
     self.ranking = Ranking()
     self.rule_base = RuleBase()
     self.view_help = ViewFeed()
Exemplo n.º 7
0
 def history(self):
     if self.__history is None:
         self.__history = set(MysqlDB().get_user_history(self.id))
     return self.__history
Exemplo n.º 8
0
 def __init__(self):
     self.mysql_db = MysqlDB()