Exemplo n.º 1
0
    def save_to_es(self):
        article = ArticleType()
        article.title = self['title']
        article.create_date = self["create_date"]
        article.content = (self["content"])
        if "front_image_url" in self:
            article.front_image_url = self["front_image_url"]
        if "front_image_path" in self:
            article.front_image_path = self["front_image_path"]
        article.praise_nums = self["praise_nums"]
        article.fav_nums = self["fav_nums"]
        article.comment_nums = self["comment_nums"]
        article.url = self["url"]
        article.tags = self["tags"]
        article.id = self["url_object_id"]

        # 生成搜索建议词
        article.suggest = gen_suggests(ArticleType._doc_type.index,
                                       ((article.title, 10),
                                        (article.tags, 7)))

        article.save()
        # 数据加1操作
        redis_cli.incr("cnblogs_count")

        return
Exemplo n.º 2
0
    def process_item(self, item, spider):
        article = ArticleType()
        article.title = item["title"]
        article.create_date = item["create_date"]
        article.content = remove_tags(item["content"]).strip().replace(
            "\r\n", "").replace("\t", "")
        article.front_image_url = item["front_image_url"]
        # article.front_image_path = item["front_image_path"]
        article.praise_nums = item["praise_nums"]
        article.comment_nums = item["comment_nums"]
        article.fav_nums = item["fav_nums"]
        article.url = item["url"]
        article.tags = item["tags"]
        article.id = item["url_object_id"]

        title_suggest = self.gen_suggests(article.title, article.tags)
        article.title_suggest = title_suggest

        article.save()
        redis_cli.incr("jobbole_count")
        return item