def save_to_es(self): self.clean_data() job = LagouJobIndex() job.title = self["title"] job.url = self["url"] job.meta.id = self["url_object_id"] job.salary_min = self["salary_min"] job.salary_max = self["salary_max"] job.job_city = self["job_city"] job.work_years_min = self["work_years_min"] job.work_years_max = self["work_years_max"] job.degree_need = self["degree_need"] job.job_desc = remove_tags(self["job_desc"]).strip().replace( "\r\n", "").replace("\t", "") job.job_advantage = self["job_advantage"] job.tags = self["tags"] job.job_type = self["job_type"] job.publish_time = self["publish_time"] job.job_addr = self["job_addr"] job.company_name = self["company_name"] job.company_url = self["company_url"] job.crawl_time = self['crawl_time'] job.suggest = generate_suggests( es_lagou_job, ((job.title, 10), (job.tags, 7), (job.job_advantage, 6), (job.job_desc, 3), (job.job_addr, 5), (job.company_name, 8), (job.degree_need, 4), (job.job_city, 9))) real_time_count('lagou_job_count', JOB_COUNT_INIT) job.save()
def save_to_es(self): self.clean_data() zhihu = ZhiHuAnswerIndex() zhihu.meta.id = self["url_object_id"] zhihu.answer_id = self["answer_id"] zhihu.question_id = self["question_id"] zhihu.author_id = self["author_id"] zhihu.author_name = self["author_name"] zhihu.content = self["content"] zhihu.praise_num = self["praise_num"] zhihu.comments_num = self["comments_num"] zhihu.url = self["url"] zhihu.create_time = self["create_time"] zhihu.update_time = self["update_time"] zhihu.crawl_time = self["crawl_time"] # 在保存数据时便传入suggest zhihu.suggest = generate_suggests(es_zhihu_answer, ((zhihu.author_name, 10), (zhihu.content, 7))) real_time_count("zhihu_answer_count", ZHIHU_QUESTION_COUNT_INIT) zhihu.save()
def save_to_es(self): self.clean_data() zhihu = ZhiHuQuestionIndex() zhihu.meta.id = self["url_object_id"] zhihu.question_id = self["question_id"] zhihu.title = self["title"] zhihu.content = self["content"] zhihu.topics = self["topics"] zhihu.answer_num = self["answer_num"] zhihu.comments_num = self["comments_num"] zhihu.watch_user_num = self["watch_user_num"] zhihu.click_num = self["click_num"] zhihu.url = self["url"] zhihu.crawl_time = self["crawl_time"] # 在保存数据时便传入suggest zhihu.suggest = generate_suggests(es_zhihu_question, ((zhihu.title, 10), (zhihu.topics, 7), (zhihu.content, 5))) real_time_count('zhihu_question_count', ZHIHU_QUESTION_COUNT_INIT) zhihu.save()
def save_to_es(self): """保存伯乐在线文章到es中""" self.clean_data() blog = JobboleBlogIndex() blog.title = self['title'] blog.create_date = self["create_date"] blog.content = remove_tags(self["content"]) blog.front_image_url = self["front_image_url"] blog.praise_nums = self["praise_nums"] blog.fav_nums = self["fav_nums"] blog.comment_nums = self["comment_nums"] blog.url = self["url"] blog.tags = self["tags"] blog.meta.id = self["url_object_id"] # 在保存数据时必须传入suggest blog.suggest = generate_suggests(es_jobbole_blog, ((blog.title, 10), (blog.tags, 6), (blog.content, 4))) real_time_count('jobbole_blog_count', JOBBOLE_COUNT_INIT) blog.save()