def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self["create_date"] article.content = (self["content"]) if "front_image_url" in self: article.front_image_url = self["front_image_url"] if "front_image_path" in self: article.front_image_path = self["front_image_path"] article.praise_nums = self["praise_nums"] article.fav_nums = self["fav_nums"] article.comment_nums = self["comment_nums"] article.url = self["url"] article.tags = self["tags"] article.id = self["url_object_id"] # 生成搜索建议词 article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save() # 数据加1操作 redis_cli.incr("cnblogs_count") return
def process_item(self, item, spider): article = ArticleType() article.title = item["title"] article.create_date = item["create_date"] article.content = remove_tags(item["content"]).strip().replace( "\r\n", "").replace("\t", "") article.front_image_url = item["front_image_url"] # article.front_image_path = item["front_image_path"] article.praise_nums = item["praise_nums"] article.comment_nums = item["comment_nums"] article.fav_nums = item["fav_nums"] article.url = item["url"] article.tags = item["tags"] article.id = item["url_object_id"] title_suggest = self.gen_suggests(article.title, article.tags) article.title_suggest = title_suggest article.save() redis_cli.incr("jobbole_count") return item