def process_photo_task(update, context): """ Receive the task and perform the task on the photo Args: update: the update object context: the context object Returns: The variable indicating the conversation has ended """ if not check_user_data(update, context, PHOTO_ID): return ConversationHandler.END _ = set_lang(update, context) user_data = context.user_data file_id = user_data[PHOTO_ID] if update.effective_message.text == _(BEAUTIFY): process_photo(update, context, [file_id], is_beautify=True) else: process_photo(update, context, [file_id], is_beautify=False) if user_data[PHOTO_ID] == file_id: del user_data[PHOTO_ID] return ConversationHandler.END
def add_ocr_to_pdf(update, context): if not check_user_data(update, context, PDF_INFO): return ConversationHandler.END _ = set_lang(update, context) update.effective_message.reply_text( _("Adding an OCR text layer to your PDF file"), reply_markup=ReplyKeyboardRemove(), ) with tempfile.NamedTemporaryFile() as tf: user_data = context.user_data file_id, file_name = user_data[PDF_INFO] pdf_file = context.bot.get_file(file_id) pdf_file.download(custom_path=tf.name) with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join(dir_name, f"OCR_{os.path.splitext(file_name)[0]}.pdf") try: # logging.getLogger("ocrmypdf").setLevel(logging.WARNING) ocrmypdf.ocr(tf.name, out_fn, deskew=True, progress_bar=False) send_result_file(update, context, out_fn, "ocr") except PriorOcrFoundError: update.effective_message.reply_text( _("Your PDF file already has a text layer")) # Clean up memory if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] return ConversationHandler.END
def get_pdf_photos(update, context): if not check_user_data(update, context, PDF_INFO): return ConversationHandler.END _ = set_lang(update, context) update.effective_message.reply_text( _("Extracting all the photos in your PDF file"), reply_markup=ReplyKeyboardRemove(), ) with tempfile.NamedTemporaryFile() as tf: user_data = context.user_data file_id, file_name = user_data[PDF_INFO] pdf_file = context.bot.get_file(file_id) pdf_file.download(custom_path=tf.name) with tempfile.TemporaryDirectory() as tmp_dir_name: dir_name = os.path.join(tmp_dir_name, "Photos_In_PDF") os.mkdir(dir_name) if not write_photos_in_pdf(tf.name, dir_name, file_name): update.effective_message.reply_text( _("Something went wrong, try again")) else: if not os.listdir(dir_name): update.effective_message.reply_text( _("I couldn't find any photos in your PDF file")) else: send_result_photos(update, context, dir_name, "get_photos") # Clean up memory if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] return ConversationHandler.END
def get_pdf_text(update, context, is_file): if not check_user_data(update, context, PDF_INFO): return ConversationHandler.END _ = set_lang(update, context) update.effective_message.reply_text( _("Extracting text from your PDF file"), reply_markup=ReplyKeyboardRemove() ) with tempfile.NamedTemporaryFile() as tf: user_data = context.user_data file_id, file_name = user_data[PDF_INFO] pdf_file = context.bot.get_file(file_id) pdf_file.download(custom_path=tf.name) with tempfile.TemporaryDirectory() as dir_name: tmp_text = tempfile.TemporaryFile() with open(tf.name, "rb") as f: extract_text_to_fp(f, tmp_text) tmp_text.seek(0) pdf_texts = textwrap.wrap(tmp_text.read().decode("utf-8").strip()) out_fn = os.path.join(dir_name, f"{os.path.splitext(file_name)[0]}.txt") send_pdf_text(update, context, pdf_texts, is_file, out_fn) # Clean up memory if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] return ConversationHandler.END
def check_second_doc(update, context): if not check_user_data(update, context, COMPARE_ID): return ConversationHandler.END result = check_pdf(update, context) if result == PDF_INVALID_FORMAT: return WAIT_SECOND elif result != PDF_OK: return ConversationHandler.END return compare_pdf(update, context)
def compress_pdf(update, context): if not check_user_data(update, context, PDF_INFO): return ConversationHandler.END _ = set_lang(update, context) update.effective_message.reply_text( _("Compressing your PDF file"), reply_markup=ReplyKeyboardRemove(), ) with tempfile.NamedTemporaryFile() as tf: user_data = context.user_data file_id, file_name = user_data[PDF_INFO] pdf_file = context.bot.get_file(file_id) pdf_file.download(custom_path=tf.name) with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join( dir_name, f"Compressed_{os.path.splitext(file_name)[0]}.pdf") cmd = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/default \ -dNOPAUSE -dQUIET -dBATCH -sOutputFile={} {}".format( out_fn, tf.name) proc = Popen(shlex.split(cmd), stdout=PIPE, stderr=PIPE, shell=False) out, err = proc.communicate() if proc.returncode != 0: log = Logger() log.error( f'Stdout:\n{out.decode("utf-8")}\n\nStderr:\n{err.decode("utf-8")}' ) update.effective_message.reply_text( _("Something went wrong, try again")) else: old_size = os.path.getsize(tf.name) new_size = os.path.getsize(out_fn) update.effective_message.reply_text( _("File size reduced by <b>{:.0%}</b>, " "from <b>{}</b> to <b>{}</b>".format( (1 - new_size / old_size), humanize.naturalsize(old_size), humanize.naturalsize(new_size), )), parse_mode=ParseMode.HTML, ) send_result_file(update, context, out_fn, "compress") # Clean up memory if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] return ConversationHandler.END
def check_wmk_doc(update, context): if not check_user_data(update, context, WMK_ID): return ConversationHandler.END result = check_pdf(update, context) if result == PDF_INVALID_FORMAT: return WAIT_WMK elif result != PDF_OK: return ConversationHandler.END return add_wmk(update, context)
def rotate_pdf(update, context): if not check_user_data(update, context, PDF_INFO): return ConversationHandler.END _ = set_lang(update, context) degree = int(update.effective_message.text) update.effective_message.reply_text( _("Rotating your PDF file clockwise by {} degrees").format(degree), reply_markup=ReplyKeyboardRemove(), ) process_pdf(update, context, "rotated", rotate_degree=degree) return ConversationHandler.END
def compress_pdf(update, context): if not check_user_data(update, context, PDF_INFO): return ConversationHandler.END _ = set_lang(update, context) update.effective_message.reply_text( _("Compressing your PDF file"), reply_markup=ReplyKeyboardRemove(), ) with tempfile.NamedTemporaryFile() as tf: user_data = context.user_data file_id, file_name = user_data[PDF_INFO] pdf_file = context.bot.get_file(file_id) pdf_file.download(custom_path=tf.name) with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join( dir_name, f"Compressed_{os.path.splitext(file_name)[0]}.pdf") command = ("gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 " "-dPDFSETTINGS=/default -dNOPAUSE -dQUIET -dBATCH " f'-sOutputFile="{out_fn}" "{tf.name}"') if run_cmd(command): old_size = os.path.getsize(tf.name) new_size = os.path.getsize(out_fn) update.effective_message.reply_text( _("File size reduced by <b>{:.0%}</b>, " "from <b>{}</b> to <b>{}</b>".format( (1 - new_size / old_size), humanize.naturalsize(old_size), humanize.naturalsize(new_size), )), parse_mode=ParseMode.HTML, ) send_result_file(update, context, out_fn, "compress") else: update.effective_message.reply_text( _("Something went wrong, try again")) # Clean up memory if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] return ConversationHandler.END
def check_back_user_data(update, context): """ Check for back action and if user data is valid Args: update: the update object context: the context object Returns: A state if it is a back action of the user data is invalid, else None """ _ = set_lang(update, context) result = None if update.effective_message.text == _(BACK): result = ask_doc_task(update, context) elif not check_user_data(update, context, PDF_INFO): result = ConversationHandler.END return result
def preprocess_merge_pdf(update, context): if not check_user_data(update, context, MERGE_IDS): return ConversationHandler.END _ = set_lang(update, context) num_files = len(context.user_data[MERGE_IDS]) if num_files == 0: update.effective_message.reply_text( _("You haven't sent me any PDF files")) return ask_first_doc(update, context) elif num_files == 1: update.effective_message.reply_text( _("You've only sent me one PDF file.")) return ask_next_doc(update, context) else: return merge_pdf(update, context)
def remove_doc(update, context): if not check_user_data(update, context, MERGE_IDS): return ConversationHandler.END _ = set_lang(update, context) file_ids = context.user_data[MERGE_IDS] file_names = context.user_data[MERGE_NAMES] file_ids.pop() file_name = file_names.pop() update.effective_message.reply_text( _("*{}* has been removed for merging").format(file_name), parse_mode=ParseMode.MARKDOWN, ) if len(file_ids) == 0: return ask_first_doc(update, context) else: return ask_next_doc(update, context)
def check_text(update: Update, context: CallbackContext) -> int: message = update.effective_message message.chat.send_action(ChatAction.TYPING) _ = set_lang(update, context) text = message.text if text in [_(REMOVE_LAST), _(DONE)]: user_id = message.from_user.id lock = merge_locks[user_id] if not check_user_data(update, context, MERGE_IDS, lock): return ConversationHandler.END if text == _(REMOVE_LAST): return remove_doc(update, context, lock) elif text == _(DONE): return preprocess_merge_pdf(update, context, lock) elif text == _(CANCEL): return cancel(update, context)
def process_all_photos(update, context): if not check_user_data(update, context, PHOTO_IDS): return ConversationHandler.END user_data = context.user_data file_ids = user_data[PHOTO_IDS] file_names = user_data[PHOTO_NAMES] if update.effective_message.text == BEAUTIFY: process_photo(update, context, file_ids, is_beautify=True) else: process_photo(update, context, file_ids, is_beautify=False) # Clean up memory if user_data[PHOTO_IDS] == file_ids: del user_data[PHOTO_IDS] if user_data[PHOTO_NAMES] == file_names: del user_data[PHOTO_NAMES] return ConversationHandler.END
def add_wmk(update, context): if not check_user_data(update, context, WMK_ID): return ConversationHandler.END _ = set_lang(update, context) update.effective_message.reply_text( _("Adding the watermark onto your PDF file"), reply_markup=ReplyKeyboardRemove()) # Setup temporary files temp_files = [tempfile.NamedTemporaryFile() for _ in range(2)] src_fn, wmk_fn = [x.name for x in temp_files] user_data = context.user_data src_file_id = user_data[WMK_ID] wmk_file_id = update.effective_message.document.file_id src_reader = open_pdf(update, context, src_file_id, src_fn) if src_reader is not None: wmk_reader = open_pdf(update, context, wmk_file_id, wmk_fn, _("watermark")) if wmk_reader is not None: # Add watermark pdf_writer = PdfFileWriter() for page in src_reader.pages: page.mergePage(wmk_reader.getPage(0)) pdf_writer.addPage(page) # Send result file write_send_pdf(update, context, pdf_writer, "file.pdf", "watermarked") # Clean up memory and files if user_data[WMK_ID] == src_file_id: del user_data[WMK_ID] for tf in temp_files: tf.close() return ConversationHandler.END
def pdf_to_photos(update, context): if not check_user_data(update, context, PDF_INFO): return ConversationHandler.END _ = set_lang(update, context) update.effective_message.reply_text( _("Converting your PDF file into photos"), reply_markup=ReplyKeyboardRemove()) with tempfile.NamedTemporaryFile() as tf: user_data = context.user_data file_id, file_name = user_data[PDF_INFO] pdf_file = context.bot.get_file(file_id) pdf_file.download(custom_path=tf.name) with tempfile.TemporaryDirectory() as tmp_dir_name: # Setup the directory for the photos dir_name = os.path.join(tmp_dir_name, "PDF_Photos") os.mkdir(dir_name) # Convert the PDF file into photos pdf2image.convert_from_path( tf.name, output_folder=dir_name, output_file=os.path.splitext(file_name)[0], fmt="png", ) # Handle the result photos send_result_photos(update, context, dir_name, "to_photos") # Clean up memory if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] return ConversationHandler.END
def text_to_pdf(update: Update, context: CallbackContext, font_family: str, font_url: str): if not check_user_data(update, context, TEXT): return ConversationHandler.END _ = set_lang(update, context) text = context.user_data[TEXT] update.effective_message.reply_text(_("Creating your PDF file"), reply_markup=ReplyKeyboardRemove()) html = HTML(string="<p>{}</p>".format(text.replace("\n", "<br/>"))) font_config = FontConfiguration() stylesheets: List[CSS] = None if font_family != DEFAULT_FONT: stylesheets = [ CSS( string=("@font-face {" f"font-family: {font_family};" f"src: url({font_url});" "}" "p {" f"font-family: {font_family};" "}"), font_config=font_config, ) ] with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join(dir_name, "Text.pdf") html.write_pdf(out_fn, stylesheets=stylesheets, font_config=font_config) send_result_file(update, context, out_fn, "text") return ConversationHandler.END
def check_text(update: Update, context: CallbackContext) -> int: message = update.effective_message message.chat.send_action(ChatAction.TYPING) text = update.effective_message.text result = ConversationHandler.END _ = set_lang(update, context) if text in [_(REMOVE_LAST), _(BEAUTIFY), _(TO_PDF)]: user_id = message.from_user.id photo_locks[user_id].acquire() if not check_user_data(update, context, PHOTO_IDS): result = ConversationHandler.END else: if text == _(REMOVE_LAST): result = remove_photo(update, context) elif text in [_(BEAUTIFY), _(TO_PDF)]: result = process_all_photos(update, context) photo_locks[user_id].release() elif text == _(CANCEL): result = cancel(update, context) return result