示例#1
0
def process_photo_task(update, context):
    """
    Receive the task and perform the task on the photo
    Args:
        update: the update object
        context: the context object

    Returns:
        The variable indicating the conversation has ended
    """
    if not check_user_data(update, context, PHOTO_ID):
        return ConversationHandler.END

    _ = set_lang(update, context)
    user_data = context.user_data
    file_id = user_data[PHOTO_ID]

    if update.effective_message.text == _(BEAUTIFY):
        process_photo(update, context, [file_id], is_beautify=True)
    else:
        process_photo(update, context, [file_id], is_beautify=False)

    if user_data[PHOTO_ID] == file_id:
        del user_data[PHOTO_ID]

    return ConversationHandler.END
示例#2
0
def add_ocr_to_pdf(update, context):
    if not check_user_data(update, context, PDF_INFO):
        return ConversationHandler.END

    _ = set_lang(update, context)
    update.effective_message.reply_text(
        _("Adding an OCR text layer to your PDF file"),
        reply_markup=ReplyKeyboardRemove(),
    )

    with tempfile.NamedTemporaryFile() as tf:
        user_data = context.user_data
        file_id, file_name = user_data[PDF_INFO]
        pdf_file = context.bot.get_file(file_id)
        pdf_file.download(custom_path=tf.name)

        with tempfile.TemporaryDirectory() as dir_name:
            out_fn = os.path.join(dir_name,
                                  f"OCR_{os.path.splitext(file_name)[0]}.pdf")
            try:
                # logging.getLogger("ocrmypdf").setLevel(logging.WARNING)
                ocrmypdf.ocr(tf.name, out_fn, deskew=True, progress_bar=False)
                send_result_file(update, context, out_fn, "ocr")
            except PriorOcrFoundError:
                update.effective_message.reply_text(
                    _("Your PDF file already has a text layer"))

    # Clean up memory
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]

    return ConversationHandler.END
示例#3
0
def get_pdf_photos(update, context):
    if not check_user_data(update, context, PDF_INFO):
        return ConversationHandler.END

    _ = set_lang(update, context)
    update.effective_message.reply_text(
        _("Extracting all the photos in your PDF file"),
        reply_markup=ReplyKeyboardRemove(),
    )

    with tempfile.NamedTemporaryFile() as tf:
        user_data = context.user_data
        file_id, file_name = user_data[PDF_INFO]
        pdf_file = context.bot.get_file(file_id)
        pdf_file.download(custom_path=tf.name)

        with tempfile.TemporaryDirectory() as tmp_dir_name:
            dir_name = os.path.join(tmp_dir_name, "Photos_In_PDF")
            os.mkdir(dir_name)
            if not write_photos_in_pdf(tf.name, dir_name, file_name):
                update.effective_message.reply_text(
                    _("Something went wrong, try again"))
            else:
                if not os.listdir(dir_name):
                    update.effective_message.reply_text(
                        _("I couldn't find any photos in your PDF file"))
                else:
                    send_result_photos(update, context, dir_name, "get_photos")

    # Clean up memory
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]

    return ConversationHandler.END
示例#4
0
文件: text.py 项目: kmacprt/pdfbot
def get_pdf_text(update, context, is_file):
    if not check_user_data(update, context, PDF_INFO):
        return ConversationHandler.END

    _ = set_lang(update, context)
    update.effective_message.reply_text(
        _("Extracting text from your PDF file"), reply_markup=ReplyKeyboardRemove()
    )

    with tempfile.NamedTemporaryFile() as tf:
        user_data = context.user_data
        file_id, file_name = user_data[PDF_INFO]
        pdf_file = context.bot.get_file(file_id)
        pdf_file.download(custom_path=tf.name)

        with tempfile.TemporaryDirectory() as dir_name:
            tmp_text = tempfile.TemporaryFile()
            with open(tf.name, "rb") as f:
                extract_text_to_fp(f, tmp_text)

            tmp_text.seek(0)
            pdf_texts = textwrap.wrap(tmp_text.read().decode("utf-8").strip())
            out_fn = os.path.join(dir_name, f"{os.path.splitext(file_name)[0]}.txt")
            send_pdf_text(update, context, pdf_texts, is_file, out_fn)

    # Clean up memory
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]

    return ConversationHandler.END
def check_second_doc(update, context):
    if not check_user_data(update, context, COMPARE_ID):
        return ConversationHandler.END

    result = check_pdf(update, context)
    if result == PDF_INVALID_FORMAT:
        return WAIT_SECOND
    elif result != PDF_OK:
        return ConversationHandler.END

    return compare_pdf(update, context)
示例#6
0
def compress_pdf(update, context):
    if not check_user_data(update, context, PDF_INFO):
        return ConversationHandler.END

    _ = set_lang(update, context)
    update.effective_message.reply_text(
        _("Compressing your PDF file"),
        reply_markup=ReplyKeyboardRemove(),
    )

    with tempfile.NamedTemporaryFile() as tf:
        user_data = context.user_data
        file_id, file_name = user_data[PDF_INFO]
        pdf_file = context.bot.get_file(file_id)
        pdf_file.download(custom_path=tf.name)

        with tempfile.TemporaryDirectory() as dir_name:
            out_fn = os.path.join(
                dir_name, f"Compressed_{os.path.splitext(file_name)[0]}.pdf")
            cmd = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/default \
            -dNOPAUSE -dQUIET -dBATCH -sOutputFile={} {}".format(
                out_fn, tf.name)
            proc = Popen(shlex.split(cmd),
                         stdout=PIPE,
                         stderr=PIPE,
                         shell=False)
            out, err = proc.communicate()

            if proc.returncode != 0:
                log = Logger()
                log.error(
                    f'Stdout:\n{out.decode("utf-8")}\n\nStderr:\n{err.decode("utf-8")}'
                )
                update.effective_message.reply_text(
                    _("Something went wrong, try again"))
            else:
                old_size = os.path.getsize(tf.name)
                new_size = os.path.getsize(out_fn)
                update.effective_message.reply_text(
                    _("File size reduced by <b>{:.0%}</b>, "
                      "from <b>{}</b> to <b>{}</b>".format(
                          (1 - new_size / old_size),
                          humanize.naturalsize(old_size),
                          humanize.naturalsize(new_size),
                      )),
                    parse_mode=ParseMode.HTML,
                )
                send_result_file(update, context, out_fn, "compress")

    # Clean up memory
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]

    return ConversationHandler.END
示例#7
0
def check_wmk_doc(update, context):
    if not check_user_data(update, context, WMK_ID):
        return ConversationHandler.END

    result = check_pdf(update, context)
    if result == PDF_INVALID_FORMAT:
        return WAIT_WMK
    elif result != PDF_OK:
        return ConversationHandler.END

    return add_wmk(update, context)
示例#8
0
def rotate_pdf(update, context):
    if not check_user_data(update, context, PDF_INFO):
        return ConversationHandler.END

    _ = set_lang(update, context)
    degree = int(update.effective_message.text)
    update.effective_message.reply_text(
        _("Rotating your PDF file clockwise by {} degrees").format(degree),
        reply_markup=ReplyKeyboardRemove(),
    )
    process_pdf(update, context, "rotated", rotate_degree=degree)

    return ConversationHandler.END
def compress_pdf(update, context):
    if not check_user_data(update, context, PDF_INFO):
        return ConversationHandler.END

    _ = set_lang(update, context)
    update.effective_message.reply_text(
        _("Compressing your PDF file"),
        reply_markup=ReplyKeyboardRemove(),
    )

    with tempfile.NamedTemporaryFile() as tf:
        user_data = context.user_data
        file_id, file_name = user_data[PDF_INFO]
        pdf_file = context.bot.get_file(file_id)
        pdf_file.download(custom_path=tf.name)

        with tempfile.TemporaryDirectory() as dir_name:
            out_fn = os.path.join(
                dir_name, f"Compressed_{os.path.splitext(file_name)[0]}.pdf")
            command = ("gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 "
                       "-dPDFSETTINGS=/default -dNOPAUSE -dQUIET -dBATCH "
                       f'-sOutputFile="{out_fn}" "{tf.name}"')

            if run_cmd(command):
                old_size = os.path.getsize(tf.name)
                new_size = os.path.getsize(out_fn)
                update.effective_message.reply_text(
                    _("File size reduced by <b>{:.0%}</b>, "
                      "from <b>{}</b> to <b>{}</b>".format(
                          (1 - new_size / old_size),
                          humanize.naturalsize(old_size),
                          humanize.naturalsize(new_size),
                      )),
                    parse_mode=ParseMode.HTML,
                )
                send_result_file(update, context, out_fn, "compress")

            else:
                update.effective_message.reply_text(
                    _("Something went wrong, try again"))

    # Clean up memory
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]

    return ConversationHandler.END
示例#10
0
文件: utils.py 项目: kmacprt/pdfbot
def check_back_user_data(update, context):
    """
    Check for back action and if user data is valid
    Args:
        update: the update object
        context: the context object

    Returns:
        A state if it is a back action of the user data is invalid, else None
    """
    _ = set_lang(update, context)
    result = None

    if update.effective_message.text == _(BACK):
        result = ask_doc_task(update, context)
    elif not check_user_data(update, context, PDF_INFO):
        result = ConversationHandler.END

    return result
示例#11
0
def preprocess_merge_pdf(update, context):
    if not check_user_data(update, context, MERGE_IDS):
        return ConversationHandler.END

    _ = set_lang(update, context)
    num_files = len(context.user_data[MERGE_IDS])

    if num_files == 0:
        update.effective_message.reply_text(
            _("You haven't sent me any PDF files"))

        return ask_first_doc(update, context)
    elif num_files == 1:
        update.effective_message.reply_text(
            _("You've only sent me one PDF file."))

        return ask_next_doc(update, context)
    else:
        return merge_pdf(update, context)
示例#12
0
def remove_doc(update, context):
    if not check_user_data(update, context, MERGE_IDS):
        return ConversationHandler.END

    _ = set_lang(update, context)
    file_ids = context.user_data[MERGE_IDS]
    file_names = context.user_data[MERGE_NAMES]
    file_ids.pop()
    file_name = file_names.pop()

    update.effective_message.reply_text(
        _("*{}* has been removed for merging").format(file_name),
        parse_mode=ParseMode.MARKDOWN,
    )

    if len(file_ids) == 0:
        return ask_first_doc(update, context)
    else:
        return ask_next_doc(update, context)
示例#13
0
def check_text(update: Update, context: CallbackContext) -> int:
    message = update.effective_message
    message.chat.send_action(ChatAction.TYPING)
    _ = set_lang(update, context)
    text = message.text

    if text in [_(REMOVE_LAST), _(DONE)]:
        user_id = message.from_user.id
        lock = merge_locks[user_id]

        if not check_user_data(update, context, MERGE_IDS, lock):
            return ConversationHandler.END

        if text == _(REMOVE_LAST):
            return remove_doc(update, context, lock)
        elif text == _(DONE):
            return preprocess_merge_pdf(update, context, lock)
    elif text == _(CANCEL):
        return cancel(update, context)
示例#14
0
def process_all_photos(update, context):
    if not check_user_data(update, context, PHOTO_IDS):
        return ConversationHandler.END

    user_data = context.user_data
    file_ids = user_data[PHOTO_IDS]
    file_names = user_data[PHOTO_NAMES]

    if update.effective_message.text == BEAUTIFY:
        process_photo(update, context, file_ids, is_beautify=True)
    else:
        process_photo(update, context, file_ids, is_beautify=False)

    # Clean up memory
    if user_data[PHOTO_IDS] == file_ids:
        del user_data[PHOTO_IDS]
    if user_data[PHOTO_NAMES] == file_names:
        del user_data[PHOTO_NAMES]

    return ConversationHandler.END
示例#15
0
def add_wmk(update, context):
    if not check_user_data(update, context, WMK_ID):
        return ConversationHandler.END

    _ = set_lang(update, context)
    update.effective_message.reply_text(
        _("Adding the watermark onto your PDF file"),
        reply_markup=ReplyKeyboardRemove())

    # Setup temporary files
    temp_files = [tempfile.NamedTemporaryFile() for _ in range(2)]
    src_fn, wmk_fn = [x.name for x in temp_files]

    user_data = context.user_data
    src_file_id = user_data[WMK_ID]
    wmk_file_id = update.effective_message.document.file_id
    src_reader = open_pdf(update, context, src_file_id, src_fn)

    if src_reader is not None:
        wmk_reader = open_pdf(update, context, wmk_file_id, wmk_fn,
                              _("watermark"))
        if wmk_reader is not None:
            # Add watermark
            pdf_writer = PdfFileWriter()
            for page in src_reader.pages:
                page.mergePage(wmk_reader.getPage(0))
                pdf_writer.addPage(page)

            # Send result file
            write_send_pdf(update, context, pdf_writer, "file.pdf",
                           "watermarked")

    # Clean up memory and files
    if user_data[WMK_ID] == src_file_id:
        del user_data[WMK_ID]
    for tf in temp_files:
        tf.close()

    return ConversationHandler.END
示例#16
0
def pdf_to_photos(update, context):
    if not check_user_data(update, context, PDF_INFO):
        return ConversationHandler.END

    _ = set_lang(update, context)
    update.effective_message.reply_text(
        _("Converting your PDF file into photos"),
        reply_markup=ReplyKeyboardRemove())

    with tempfile.NamedTemporaryFile() as tf:
        user_data = context.user_data
        file_id, file_name = user_data[PDF_INFO]
        pdf_file = context.bot.get_file(file_id)
        pdf_file.download(custom_path=tf.name)

        with tempfile.TemporaryDirectory() as tmp_dir_name:
            # Setup the directory for the photos
            dir_name = os.path.join(tmp_dir_name, "PDF_Photos")
            os.mkdir(dir_name)

            # Convert the PDF file into photos
            pdf2image.convert_from_path(
                tf.name,
                output_folder=dir_name,
                output_file=os.path.splitext(file_name)[0],
                fmt="png",
            )

            # Handle the result photos
            send_result_photos(update, context, dir_name, "to_photos")

    # Clean up memory
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]

    return ConversationHandler.END
示例#17
0
def text_to_pdf(update: Update, context: CallbackContext, font_family: str,
                font_url: str):
    if not check_user_data(update, context, TEXT):
        return ConversationHandler.END

    _ = set_lang(update, context)
    text = context.user_data[TEXT]
    update.effective_message.reply_text(_("Creating your PDF file"),
                                        reply_markup=ReplyKeyboardRemove())

    html = HTML(string="<p>{}</p>".format(text.replace("\n", "<br/>")))
    font_config = FontConfiguration()
    stylesheets: List[CSS] = None

    if font_family != DEFAULT_FONT:
        stylesheets = [
            CSS(
                string=("@font-face {"
                        f"font-family: {font_family};"
                        f"src: url({font_url});"
                        "}"
                        "p {"
                        f"font-family: {font_family};"
                        "}"),
                font_config=font_config,
            )
        ]

    with tempfile.TemporaryDirectory() as dir_name:
        out_fn = os.path.join(dir_name, "Text.pdf")
        html.write_pdf(out_fn,
                       stylesheets=stylesheets,
                       font_config=font_config)
        send_result_file(update, context, out_fn, "text")

    return ConversationHandler.END
示例#18
0
def check_text(update: Update, context: CallbackContext) -> int:
    message = update.effective_message
    message.chat.send_action(ChatAction.TYPING)
    text = update.effective_message.text
    result = ConversationHandler.END
    _ = set_lang(update, context)

    if text in [_(REMOVE_LAST), _(BEAUTIFY), _(TO_PDF)]:
        user_id = message.from_user.id
        photo_locks[user_id].acquire()

        if not check_user_data(update, context, PHOTO_IDS):
            result = ConversationHandler.END
        else:
            if text == _(REMOVE_LAST):
                result = remove_photo(update, context)
            elif text in [_(BEAUTIFY), _(TO_PDF)]:
                result = process_all_photos(update, context)

        photo_locks[user_id].release()
    elif text == _(CANCEL):
        result = cancel(update, context)

    return result