示例#1
0
 def retrieve(self, request, *args, **kwargs):
     # 获取小说主体
     novel_id = request.query_params.get("novel_id")
     try:
         novel = NovelEntry.objects.get(is_active=True, id=novel_id)
     except NovelEntry.DoesNotExist:
         return SuccessHR("不存在该小说")
     else:
         novel_name = novel.name
         # 判断是否已经存在该文件
         file_path = BASE_DIR + "/novel/" + novel_name + ".txt"
         if not os.path.exists(file_path):
             # 获取章节内容顺序
             sections = SectionContent.objects.filter(
                 is_active=True, novel=novel).order_by("section__order")
             with open(file_path, "w+", encoding="utf-8") as f:
                 for i in sections:
                     log_common.info(msg=f"写入{i.section.name}")
                     f.write('\n' + i.section.name + '\n')
                     f.write(i.content)
             log_common.info(msg="写入完成")
         if os.path.exists(file_path):
             # file = open(file_path, 'r', encoding="utf-8")
             # response = self.get_file_response(file=file, file_name=novel_name + ".txt")
             response = self.big_file_download(file_name=novel_name +
                                               ".txt",
                                               file_path=file_path)
             return response
示例#2
0
    def list(self, request, *args, **kwargs):
        queryset = self.filter_queryset(self.get_queryset())

        page = self.paginate_queryset(queryset)
        if page is not None:
            serializer = self.get_serializer(page, many=True)
            return self.get_paginated_response(serializer.data)

        serializer = self.get_serializer(queryset, many=True)
        return SuccessHR(serializer.data)
示例#3
0
    def list(self, request, *args, **kwargs):
        host = request.query_params.get("host")
        url = request.query_params.get("url")
        book_name = request.query_params.get("book_name")
        if not url:
            return ErrorHR("参数url缺失")
        if host:
            self.query_sql &= Q(host__contains=host)
        book = self.get_novel_entry(book_name=book_name)
        if not book:
            return ErrorHR("不存在该书")
        # 获取章节的抓取规则
        rule = GraspRule.objects.filter(self.query_sql).first()
        list_rule = rule.list_rule
        section_rule_p = rule.section_rule_p
        section_rule = rule.section_rule
        decode = rule.decode

        res = requests_get(url=url, decode=decode)
        parse_html = html_to_etree(res)
        sections = []
        # 获取章节列表
        section_p = parse_html.xpath(list_rule)
        section_p_obj = None
        need_add_obj = []
        order = 0
        for i in section_p:
            # 判断是否为父级目录
            if dict(i.attrib).get(
                    "class") == section_rule_p and section_rule_p is not None:
                order = 0
                # 判断need_add_obj 有就新增
                if need_add_obj:
                    NovelSection.objects.bulk_create(need_add_obj)
                    need_add_obj.clear()
                _name = i.text
                section_p_obj = self.create_section(novel=book, name=_name)
            else:
                # 获取目录
                order += 1
                a = i.xpath(section_rule)
                if a:
                    o = a[0]
                    href = o.xpath("./@href")[0]
                    sec_name = o.text
                    need_add_obj.append(
                        NovelSection(novel=book,
                                     name=sec_name,
                                     url=href,
                                     parent=section_p_obj,
                                     order=order))
        # 结束后再次判断need_add_obj
        if need_add_obj:
            NovelSection.objects.bulk_create(need_add_obj)
        return SuccessHR("创建成功")
示例#4
0
文件: views.py 项目: MAOA-L/Blog
    def list(self, request, *args, **kwargs):
        a = "我怀着饥饿感寻找家, 不清楚家和饥饿感两者 究竟谁是谁的代名词。 我想我即将和父亲对饮 杯中的浓茶,一如往常, 茶水浓腻的涡旋让我 分不清所处的时光,五岁 或者二十五岁,父亲或许 尚未苍老,我并未长大。 父亲不善言辞,惯于沉默, 戒烟前香烟代表他的情愫。 餐桌上我会揶揄他的厨艺, 他始终笨拙地学不会翻炒, 而我也尝不惯杯中的浓茶。 茶水的苦味在我年轻的时岁 被舌尖放大,仿佛生活的网。 而我已沉默多年,并未想清楚 如何在父亲身上原谅我,或者 如何从我身上理解我的父亲。"
        b = "走在路上我也是一个生动的人 我的头发茂盛像青草 像来自遥远,野外的处女地 胳膊有力地摆动。嘴角 含笑 一点点微微的倔强 "

        return SuccessHR([
            {
                "title": "记",
                "content": a[:40]
            },
            {
                "title": "随手",
                "content": b[:45]
            },
        ])
示例#5
0
 def post(self, request, *args, **kwargs):
     # 小说url
     url = request.data.get("url")
     host = request.data.get("host")
     rule = self._search_rule(url=host)
     if not rule:
         return ErrorHR("该网站不存在抓取规则")
     # 获取书本实体
     book = self._get_book(url=url, rule=rule.book_name, decode=rule.decode)
     if not book.section_complete:
         # 获取章节
         self._get_sections(url=url,
                            list_rule=rule.list_rule,
                            section_rule=rule.section_rule,
                            book=book,
                            decode=rule.decode)
     if not book.content_complete:
         pass
     return SuccessHR("success")
示例#6
0
文件: pagination.py 项目: MAOA-L/Blog
    def get_paginated_response(self, data):

        if hasattr(self, 'page'):
            content = OrderedDict([
                ('pageNo', self.page.number),
                ('pageSize', self.page.paginator.per_page),
                ('total', self.page.paginator.count),
                # ('next', self.get_next_link()),
                # ('previous', self.get_previous_link()),
                ('list', data)
            ])
        else:
            # TODO self.page 属性不存在, 下述字段值需要重新获取
            content = OrderedDict([
                ('pageNo', 0),
                ('pageSize', self.page_size),
                ('total', 0),
                # ('next', self.get_next_link()),
                # ('previous', self.get_previous_link()),
                ('list', [])
            ])

        return SuccessHR(content)
示例#7
0
    def retrieve(self, request, *args, **kwargs):
        code = request.query_params.get("code")
        print(code)
        get_code2session(jscode=code)

        return SuccessHR({"name": "success"})
示例#8
0
 def list(self, request, *args, **kwargs):
     # a = 1 / 0
     return SuccessHR([])
示例#9
0
 def list(self, request, *args, **kwargs):
     # 获取小说主体
     # TODO 遍历 筛选出哪些章节需要再次获取章节内容,进行获取
     novel_id = request.query_params.get("novel_id")
     if not novel_id:
         return ErrorHR("请选择小说")
     try:
         novel = NovelEntry.objects.get(is_active=True, id=novel_id)
         host = novel.host
         url = novel.url
         # 获取抓取规则
         try:
             g_rule = GraspRule.objects.get(is_active=True,
                                            host=host,
                                            service=url)
         except GraspRule.DoesNotExist:
             return ErrorHR("不存在该小说的爬取规则配置")
         else:
             content_rule = g_rule.content_rule
             # 获取小说下未获取小说内容的章节
             # 获取已经爬取的章节
             exists_content = SectionContent.objects.filter(
                 is_active=True, novel=novel).values_list("section_id")
             n_sections = NovelSection.objects.filter(
                 is_active=True,
                 novel=novel).exclude(id__in=[i[0] for i in exists_content])
             # 生成格式
             n_sections_list = [
                 i for i in GetNovelSectionsSerializer(n_sections,
                                                       many=True).data
             ]
             # 切片 100 个一组
             section_deque = deque(maxlen=10)
             for i in n_sections_list:
                 section_deque.append(i)
                 if len(section_deque) == 10:
                     # 获取章节的内容
                     result = get_content(sections=section_deque,
                                          host=host,
                                          content_rule=content_rule,
                                          decode=g_rule.decode)
                     # 整理成对象
                     need_add_obj = []
                     for j in result:
                         pk = j.get("id")
                         content = j.get("content")
                         need_add_obj.append(
                             SectionContent(novel=novel,
                                            section_id=pk,
                                            content=content))
                     if need_add_obj:
                         SectionContent.objects.bulk_create(need_add_obj)
                     # 清空
                     log_common.info(msg="===清空队列===")
                     section_deque.clear()
             novel.content_complete = True
             novel.save()
             return SuccessHR("爬取成功")
     except NovelEntry.DoesNotExist:
         return SuccessHR("不存在该小说")
     except Exception as ex:
         log_common.error(ex)
         return SuccessHR("中断,可再次开启~")
示例#10
0
 def create(self, request, *args, **kwargs):
     serializer = self.get_serializer(data=request.data)
     serializer.is_valid(raise_exception=True)
     self.perform_create(serializer)
     return SuccessHR(serializer.data)
示例#11
0
 def retrieve(self, request, *args, **kwargs):
     instance = self.get_object()
     serializer = self.get_serializer(instance)
     return SuccessHR(serializer.data)