Exemplo n.º 1
0
 def write_remaining_save_data(self):
     """
     将剩余未处理的存档数据写入临时存档文件
     """
     if len(self.save_data) > 0 and self.temp_save_data_path:
         file.write_file(tool.list_to_string(list(self.save_data.values())),
                         self.temp_save_data_path)
Exemplo n.º 2
0
def main():
    # GUI窗口
    gui = tkinter.Tk()
    gui.withdraw()
    # 原始存档文件所在路径
    options = {
        "initialdir": os.path.join(os.path.dirname(__file__), "video"),
        "initialfile": "save.data",
        "filetypes": [("data", ".data"), ("all file", "*")],
        "title": "原始存档文件",
    }
    save_data_file_path = tkinter.filedialog.askopenfilename(**options)
    if not save_data_file_path:
        return
    options["title"] = "临时存档文件"
    temp_save_data_file_path = tkinter.filedialog.askopenfilename(**options)
    if not save_data_file_path:
        return
    if save_data_file_path == temp_save_data_file_path:
        output.print_msg("存档文件相同,无需合并")
        return
    # 临时存档文件所在路径
    save_data = crawler.read_save_data(save_data_file_path, PRIME_KEY_INDEX)
    temp_save_data = crawler.read_save_data(temp_save_data_file_path,
                                            PRIME_KEY_INDEX)
    save_data.update(temp_save_data)
    temp_list = [save_data[key] for key in sorted(save_data.keys())]
    file.write_file(tool.list_to_string(temp_list), save_data_file_path,
                    file.WRITE_FILE_TYPE_REPLACE)
Exemplo n.º 3
0
 def rewrite_save_file(self):
     """
     将临时存档文件按照主键排序后写入原始存档文件
     只支持一行一条记录,每条记录格式相同的存档文件
     """
     if self.temp_save_data_path:
         save_data = read_save_data(self.temp_save_data_path, 0, [])
         temp_list = [save_data[key] for key in sorted(save_data.keys())]
         file.write_file(tool.list_to_string(temp_list),
                         self.save_data_path, file.WRITE_FILE_TYPE_REPLACE)
         path.delete_dir_or_file(self.temp_save_data_path)
Exemplo n.º 4
0
def rewrite_save_file(temp_save_data_path: str, save_data_path: str):
    """
    将临时存档文件按照主键排序后写入原始存档文件
    只支持一行一条记录,每条记录格式相同的存档文件
    """
    warnings.warn(
        "rewrite_save_file commands are deprecated.",
        DeprecationWarning,
        stacklevel=2,
    )
    account_list = read_save_data(temp_save_data_path, 0, [])
    temp_list = [account_list[key] for key in sorted(account_list.keys())]
    file.write_file(tool.list_to_string(temp_list), save_data_path,
                    file.WRITE_FILE_TYPE_REPLACE)
    path.delete_dir_or_file(temp_save_data_path)
Exemplo n.º 5
0
def save_discount_list(discount_list):
    tool.write_file(tool.list_to_string(discount_list, "\n", ""), "discount.txt", 2)
Exemplo n.º 6
0
def rewrite_save_file(temp_save_data_path, save_data_path):
    account_list = read_save_data(temp_save_data_path, 0, [])
    temp_list = [account_list[key] for key in sorted(account_list.keys())]
    tool.write_file(tool.list_to_string(temp_list), save_data_path,
                    tool.WRITE_FILE_TYPE_REPLACE)
    path.delete_dir_or_file(temp_save_data_path)
Exemplo n.º 7
0
def rewrite_save_file(temp_save_data_path, save_data_path):
    account_list = read_save_data(temp_save_data_path, 0, [])
    temp_list = [account_list[key] for key in sorted(account_list.keys())]
    tool.write_file(tool.list_to_string(temp_list), save_data_path, 2)
    os.remove(temp_save_data_path)
Exemplo n.º 8
0
    def main(self):
        # 解析存档文件
        # 寻找fkoji.save
        account_list = robot.read_save_data(self.save_data_path, 0, ["", "", ""])

        # 这个key的内容为总数据
        if ALL_SIGN in account_list:
            image_start_index = int(account_list[ALL_SIGN][1])
            save_data_image_time = int(account_list[ALL_SIGN][2])
            account_list.pop(ALL_SIGN)
        else:
            image_start_index = 0
            save_data_image_time = 0

        if self.is_sort:
            image_path = self.image_temp_path
        else:
            image_path = self.image_download_path

        if not tool.make_dir(image_path, 0):
            # 图片保存目录创建失败
            self.print_msg("图片下载目录%s创建失败!" % self.image_download_path)
            tool.process_exit()

        # 下载
        page_index = 1
        image_count = 1
        first_image_time = 0
        unique_list = []
        is_over = False

        while not is_over:
            index_url = "http://jigadori.fkoji.com/?p=%s" % page_index
            index_page_return_code, index_page_response = tool.http_request(index_url)[:2]
            if index_page_return_code != 1:
                log.error("无法访问首页地址 %s" % index_url)
                tool.process_exit()

            index_page = BeautifulSoup.BeautifulSoup(index_page_response)
            photo_list = index_page.body.findAll("div", "photo")
            # 已经下载到最后一页
            if not photo_list:
                break
            for photo_info in photo_list:
                if isinstance(photo_info, BeautifulSoup.NavigableString):
                    continue

                # 从图片页面中解析获取推特发布时间的时间戳
                tweet_created_time = get_tweet_created_time(photo_info)
                if tweet_created_time is None:
                    log.error("第%s张图片,解析tweet-created-at失败" % image_count)
                    continue

                # 下载完毕
                if tweet_created_time <= save_data_image_time:
                    is_over = True
                    break

                # 将第一张图片的上传时间做为新的存档记录
                if first_image_time == 0:
                    first_image_time = tweet_created_time

                # 从图片页面中解析获取推特发布账号
                account_id = get_tweet_account_id(photo_info)
                if account_id is None:
                    log.error("第%s张图片,解析tweet账号失败" % image_count)
                    continue

                # 找图片
                img_tags = photo_info.findAll("img")
                for tag in img_tags:
                    tag_attr = dict(tag.attrs)
                    if robot.check_sub_key(("src", "alt"), tag_attr):
                        image_url = str(tag_attr["src"]).replace(" ", "")
                        # 新增图片导致的重复判断
                        if image_url in unique_list:
                            continue
                        else:
                            unique_list.append(image_url)

                        log.step("开始下载第%s张图片 %s" % (image_count, image_url))

                        file_type = image_url.split(".")[-1]
                        if file_type.find("/") != -1:
                            file_type = "jpg"
                        file_path = os.path.join(image_path, "%05d_%s.%s" % (image_count, account_id, file_type))
                        if tool.save_net_file(image_url, file_path):
                            log.step("第%s张图片下载成功" % image_count)
                            image_count += 1
                        else:
                            log.error("第%s张图片 %s,account_id:%s,下载失败" % (image_count, image_url, account_id))
                if is_over:
                    break

            if not is_over:
                page_index += 1

        log.step("下载完毕")

        # 排序复制到保存目录
        if self.is_sort:
            is_check_ok = False
            while not is_check_ok:
                # 等待手动检测所有图片结束
                input_str = raw_input(tool.get_time() + " 已经下载完毕,是否下一步操作? (Y)es or (N)o: ")
                input_str = input_str.lower()
                if input_str in ["y", "yes"]:
                    is_check_ok = True
                elif input_str in ["n", "no"]:
                    tool.process_exit()

            all_path = os.path.join(self.image_download_path, "all")
            if not tool.make_dir(all_path, 0):
                log.error("创建目录 %s 失败" % all_path)
                tool.process_exit()

            file_list = tool.get_dir_files_name(self.image_temp_path, "desc")
            for file_name in file_list:
                image_path = os.path.join(self.image_temp_path, file_name)
                file_name_list = file_name.split(".")
                file_type = file_name_list[-1]
                account_id = "_".join(".".join(file_name_list[:-1]).split("_")[1:])

                # 所有
                image_start_index += 1
                destination_file_name = "%05d_%s.%s" % (image_start_index, account_id, file_type)
                destination_path = os.path.join(all_path, destination_file_name)
                tool.copy_files(image_path, destination_path)

                # 单个
                each_account_path = os.path.join(self.image_download_path, "single", account_id)
                if not os.path.exists(each_account_path):
                    if not tool.make_dir(each_account_path, 0):
                        log.error("创建目录 %s 失败" % each_account_path)
                        tool.process_exit()
                if account_id in account_list:
                    account_list[account_id][1] = int(account_list[account_id][1]) + 1
                else:
                    account_list[account_id] = [account_id, 1]
                destination_file_name = "%05d.%s" % (account_list[account_id][1], file_type)
                destination_path = os.path.join(each_account_path, destination_file_name)
                tool.copy_files(image_path, destination_path)

            log.step("图片从下载目录移动到保存目录成功")

            # 删除临时文件夹
            tool.remove_dir(self.image_temp_path)

        # 保存新的存档文件
        temp_list = [account_list[key] for key in sorted(account_list.keys())]
        # 把总数据插入列表头
        temp_list.insert(0, [ALL_SIGN, str(image_start_index), str(first_image_time)])
        tool.write_file(tool.list_to_string(temp_list), self.save_data_path, 2)

        log.step("全部下载完毕,耗时%s秒,共计图片%s张" % (self.get_run_time(), image_count - 1))
Exemplo n.º 9
0
def rewrite_save_file(temp_save_data_path, save_data_path):
    account_list = read_save_data(temp_save_data_path, 0, [])
    temp_list = [account_list[key] for key in sorted(account_list.keys())]
    tool.write_file(tool.list_to_string(temp_list), save_data_path, 2)
    tool.remove_dir_or_file(temp_save_data_path)