def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self["create_date"] article.content = (self["content"]) if "front_image_url" in self: article.front_image_url = self["front_image_url"] if "front_image_path" in self: article.front_image_path = self["front_image_path"] article.praise_nums = self["praise_nums"] article.fav_nums = self["fav_nums"] article.comment_nums = self["comment_nums"] article.url = self["url"] article.tags = self["tags"] article.id = self["url_object_id"] # 生成搜索建议词 article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save() # 数据加1操作 redis_cli.incr("cnblogs_count") return
def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self["create_date"] article.content = remove_tags(self["content"]) article.front_image_url = self["front_image_url"] if "front_image_path" in self: article.front_image_path = self["front_image_path"] if "praise_nums" in self: article.praise_nums = self["praise_nums"] else: article.praise_nums = 0 article.fav_nums = self["fav_nums"] article.comment_nums = self["comment_nums"] article.url = self["url"] article.tags = self["tags"] article.meta.id = self["url_object_id"] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save() redis_cli.incr("jobbole_count") return
def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self['create_date'] article.content = remove_tags(self['content']) article.front_image_url = self['front_image_url'] if 'front_image_path' in self: article.front_image_path = self['front_image_path'] try: article.praise_number = self["praise_number"] except: print("items出错") article.praise_number = 99999 article.comment_nums = self['comment_nums'] article.content = self['content'] article.tags = self['tags'] article.fav_nums = self['fav_nums'] article.url = self['url'] article.meta.id = self['url_object_id'] article.suggest = gen_suggests(ArticleType._doc_type.index,((article.title,10),(article.tags,7))) #分词 article.save() return
def save_to_es(self): article = ArticleType() article.url = self['url'] article.imge = self["imge"] article.content = self["content"] # article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title,10),(article.tags, 7)) article.suggest = test_suggests(self["content"]) article.save()
def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self["create_date"] article.content = remove_tags(self["content"]) article.front_image_url = self["front_image_url"] if "front_image_path" in self: article.front_image_path = self["front_image_path"] article.praise_nums = self["praise_nums"] article.fav_nums = self["fav_nums"] article.comment_nums = self["comment_nums"] article.url = self["url"] article.tags = self["tags"] article.meta.id = self["url_object_id"] article.save() return
def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self["create_date"] article.content = remove_tags(self["contents"]) article.front_img_url = self["front_img_url"] if "front_img_path" in self: article.front_img_path = self["front_img_path"] article.store = self["store"] article.zan = self["zan"] article.comments = self["comments"] article.url = self["url"] article.tags = self["tags"] article.url_md5= self["url_md5"] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title,10), (article.tags, 7))) article.save() return
def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self['create_date'] article.content = self['content'] article.url = self['url'] article.tags = self['tags'] article.fav_nums = self['fav_nums'] article.praise_nums = self['praise_nums'] article.comment_nums = self['comment_nums'] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save() return
def process_item(self, item, spider): #将item转换为es的数据 article = ArticleType() article.title = item['title'] article.create_date = item["create_date"] article.content = remove_tags(item["content"]) article.front_image_url = item["front_image_url"] if "front_image_path" in item: article.front_image_path = item["front_image_path"] article.praise_nums = item["praise_nums"] article.fav_nums = item["fav_nums"] article.comment_nums = item["comment_nums"] article.url = item["url"] article.tags = item["tags"] article.meta.id = item["url_object_id"] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save() return item
def save_to_es(self): # 将jobbole文章item转换为es的数据 article = ArticleType() article.title = self['title'] article.create_date = self['create_date'] article.content = remove_tags(self['content']) article.front_image_url = self['front_image_url'] if "front_image_path" in self: article.front_image_path = self['front_image_path'] article.comments_nums = self['comments_nums'] article.praise_nums = self['praise_nums'] article.fav_nums = self['fav_nums'] article.url = self['url'] article.tags = self['tags'] article.meta.id = self['url_object_id'] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save()
def process_item(self, item, spider): article = ArticleType() article.title = item["title"] article.create_date = item["create_date"] article.content = remove_tags(item["content"]).strip().replace( "\r\n", "").replace("\t", "") article.front_image_url = item["front_image_url"] # article.front_image_path = item["front_image_path"] article.praise_nums = item["praise_nums"] article.comment_nums = item["comment_nums"] article.fav_nums = item["fav_nums"] article.url = item["url"] article.tags = item["tags"] article.id = item["url_object_id"] title_suggest = self.gen_suggests(article.title, article.tags) article.title_suggest = title_suggest article.save() redis_cli.incr("jobbole_count") return item
def save_to_es(self): # 将item转换为es的数据 article = ArticleType() article.title = self['title'] article.create_date = self['create_date'] article.content = remove_tags(self['content']) article.front_image_url = self['front_image_url'] if "front_image_path" in self: article.front_image_path = self['front_image_path'] article.praise_nums = self['praise_nums'] article.fav_nums = self['fav_nums'] article.comment_nums = self['comment_nums'] article.url = self['url'] article.tags = self['tags'] article.meta.id = self['url_object_id'] # article.suggest = [{"input":[],"weight":2}] article.suggest = gen_suggests(ArticleType._index._name, ((article.title, 10), (article.tags, 7))) article.save() return
def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self['create_date'] article.content = remove_tags(self['content']) article.front_image_url = self['front_image_url'] if 'front_image_path' in self: article.front_image_path = self['front_image_path'] article.praise_nums = self['praise_nums'] article.fav_nums = self['fav_nums'] article.comment_nums = self['comment_nums'] article.url = self['url'] article.tags = self['tags'] article.meta.id = self['url_object_id'] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save() redis_cli.incr("jobbole_count") return
def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self["create_date"] article.content = remove_tags(self["content"]) article.front_image_url = self["front_image_url"] if "front_image_path" in self: #front_image_path可能不存在 article.front_image_path = self["front_image_path"] article.praise_nums = self["praise_nums"] article.fav_nums = self["fav_nums"] article.comment_nums = self["comment_nums"] article.url = self["url"] article.tags = self["tags"] article.meta.id = self["url_object_id"] # 用 url_object_id 作为es的id # article.suggest = [{"input":[]},{"weight":2}] # input 可以用es的GET _analyze来获取 article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) # 设置搜索建议的值 # #gen_suggests(index, info_tuple) article.save() return