Exemplo n.º 1
0
    def get_one_suite_and_download(self):
        resp = self.get_suite_pages_and_start_threads()
        # 如果重复的话resp是False,其他为None
        if resp is False:
            return

        time.sleep(3)

        # 打开对应的文件夹
        # os.system("start explorer {}".format(settings.IMAGE_FOLDER.replace('/', '\\')))

        # todo: 流程可以改为解析成每页后,就用gevent分发任务了,是否下载可以用一个bool控制,不用单独下载程序
        # todo: 现在暂时不用gevent,会造成栈溢出
        threads = []
        for i in range(self.max_download_worker):
            thread = threading.Thread(target=self.download_images_to_local)
            thread.start()
            threads.append(thread)

        for t in threads:
            t.join()

        # 下载完将suite 完整性置为True
        suite_obj = DownloadedSuite.objects.filter(url=self.suite_url,
                                                   is_complete=False).first()
        if suite_obj is None:
            return

        if suite_obj.max_page == get_local_suite_count(
                suite_obj.get_suite_folder_path()):
            suite_obj.is_complete = True
            suite_obj.save()
        return
Exemplo n.º 2
0
 def check_and_mark_suite_complete(self):
     """验证并标记suite是否下载完成"""
     assert self.folder is not None
     local_count = get_local_suite_count(self.folder)
     if local_count == self.suite_obj.max_page:
         self.suite_obj.is_complete = True
     else:
         self.suite_obj.is_complete = False
     self.suite_obj.save()
Exemplo n.º 3
0
    async def callback_downloadedsuite(self, suite_id):
        suite = DownloadedSuite.objects.filter(id=suite_id).first()
        last_local_files_count = 0
        while True:
            local_files_count = get_local_suite_count(
                suite.get_suite_folder_path())
            # print(suite.max_page, suite.name, local_files_count)

            print(last_local_files_count, local_files_count)
            if last_local_files_count != local_files_count:
                # 两次检查到数据不一样的时候发送
                serializer = MzituDownloadedSuiteSerializer(suite)
                message = JSONRenderer().render(serializer.data)
                await self.write_message(message)
            await gen.sleep(1)
            last_local_files_count = local_files_count

            if suite.max_page == local_files_count:
                break
        self.close(reason='下载完成')  # 关闭这次连接
        return
Exemplo n.º 4
0
 def get_locals_count(self, obj) -> int:
     path = obj.get_suite_folder_path()
     if not path:
         return 0
     return get_local_suite_count(path)
Exemplo n.º 5
0
    def get_suite_pages_and_start_threads(self):
        """判断suite是否完整,获得每页的url,启动threads分析每页"""
        page_content = proxy_request(self.suite_url)

        max_page_num = self.parse_max_page_num_of_suite(page_content)
        title = self.parse_suite_title(page_content)
        logger.debug(title)

        suite_folder = settings.IMAGE_FOLDER_MZITU
        suite_folder = os.path.join(suite_folder, title)
        if not os.path.isdir(suite_folder):
            # folder 创建
            os.makedirs(suite_folder, exist_ok=True)

        suite_instance, is_created = DownloadedSuite.objects.get_or_create(
            name=title,
            defaults={
                'url': self.suite_url,
                'max_page': max_page_num
            })

        # 获取tags
        if not suite_instance.tags.all():
            tags_href_and_name = self.parse_tags_of_suite(page_content)
            tag_instances = []
            for href, name in tags_href_and_name:
                tag_instance, _ = Tag.objects.update_or_create(
                    name=name, defaults={'url': href})
                tag_instances.append(tag_instance)
            suite_instance.tags.set(tag_instances)

        if is_created is False:
            # 如果数据库有记录,则看看本地文件是否完整
            print("该套牌已在DB中,确认是否存在,确认套图是否完整...")
            max_page_num = suite_instance.max_page
            img_local_file_count = get_local_suite_count(
                suite_instance.get_suite_folder_path())
            img_obj_count = SuiteImageMap.objects.filter(
                suite__id=suite_instance.id).count()
            if img_local_file_count >= max_page_num and img_obj_count == suite_instance.max_page:
                # 本地文件数量匹配,img数据库完整
                print("已完整下载,跳过")
                return False
            else:
                print("该套图不完整,重新下载")

        # 分析每页
        # todo: 可以和download_one_suite的threads部分合并为一个threads启动器
        threads = []
        for i in range(1, max_page_num + 1):
            thread = threading.Thread(target=self.get_one_pic_url,
                                      args=(
                                          suite_folder,
                                          i,
                                      ))
            thread.start()
            threads.append(thread)

        for t in threads:
            t.join()
        return