def save_to_es(self):
        self.clean_data()
        job = LagouJobIndex()
        job.title = self["title"]
        job.url = self["url"]
        job.meta.id = self["url_object_id"]
        job.salary_min = self["salary_min"]
        job.salary_max = self["salary_max"]
        job.job_city = self["job_city"]
        job.work_years_min = self["work_years_min"]
        job.work_years_max = self["work_years_max"]
        job.degree_need = self["degree_need"]
        job.job_desc = remove_tags(self["job_desc"]).strip().replace(
            "\r\n", "").replace("\t", "")
        job.job_advantage = self["job_advantage"]
        job.tags = self["tags"]
        job.job_type = self["job_type"]
        job.publish_time = self["publish_time"]
        job.job_addr = self["job_addr"]
        job.company_name = self["company_name"]
        job.company_url = self["company_url"]
        job.crawl_time = self['crawl_time']

        job.suggest = generate_suggests(
            es_lagou_job,
            ((job.title, 10), (job.tags, 7), (job.job_advantage, 6),
             (job.job_desc, 3), (job.job_addr, 5), (job.company_name, 8),
             (job.degree_need, 4), (job.job_city, 9)))
        real_time_count('lagou_job_count', JOB_COUNT_INIT)
        job.save()
Exemplo n.º 2
0
    def save_to_es(self):
        self.clean_data()
        zhihu = ZhiHuAnswerIndex()

        zhihu.meta.id = self["url_object_id"]
        zhihu.answer_id = self["answer_id"]
        zhihu.question_id = self["question_id"]
        zhihu.author_id = self["author_id"]
        zhihu.author_name = self["author_name"]

        zhihu.content = self["content"]
        zhihu.praise_num = self["praise_num"]
        zhihu.comments_num = self["comments_num"]
        zhihu.url = self["url"]
        zhihu.create_time = self["create_time"]

        zhihu.update_time = self["update_time"]
        zhihu.crawl_time = self["crawl_time"]

        # 在保存数据时便传入suggest
        zhihu.suggest = generate_suggests(es_zhihu_answer,
                                          ((zhihu.author_name, 10),
                                           (zhihu.content, 7)))
        real_time_count("zhihu_answer_count", ZHIHU_QUESTION_COUNT_INIT)
        zhihu.save()
Exemplo n.º 3
0
    def save_to_es(self):
        self.clean_data()
        zhihu = ZhiHuQuestionIndex()
        zhihu.meta.id = self["url_object_id"]
        zhihu.question_id = self["question_id"]
        zhihu.title = self["title"]
        zhihu.content = self["content"]
        zhihu.topics = self["topics"]

        zhihu.answer_num = self["answer_num"]
        zhihu.comments_num = self["comments_num"]
        zhihu.watch_user_num = self["watch_user_num"]
        zhihu.click_num = self["click_num"]
        zhihu.url = self["url"]

        zhihu.crawl_time = self["crawl_time"]

        # 在保存数据时便传入suggest
        zhihu.suggest = generate_suggests(es_zhihu_question,
                                          ((zhihu.title, 10),
                                           (zhihu.topics, 7),
                                           (zhihu.content, 5)))

        real_time_count('zhihu_question_count', ZHIHU_QUESTION_COUNT_INIT)
        zhihu.save()
Exemplo n.º 4
0
 def save_to_es(self):
     """保存伯乐在线文章到es中"""
     self.clean_data()
     blog = JobboleBlogIndex()
     blog.title = self['title']
     blog.create_date = self["create_date"]
     blog.content = remove_tags(self["content"])
     blog.front_image_url = self["front_image_url"]
     blog.praise_nums = self["praise_nums"]
     blog.fav_nums = self["fav_nums"]
     blog.comment_nums = self["comment_nums"]
     blog.url = self["url"]
     blog.tags = self["tags"]
     blog.meta.id = self["url_object_id"]
     # 在保存数据时必须传入suggest
     blog.suggest = generate_suggests(es_jobbole_blog,
                                      ((blog.title, 10), (blog.tags, 6),
                                       (blog.content, 4)))
     real_time_count('jobbole_blog_count', JOBBOLE_COUNT_INIT)
     blog.save()