def crop_pdf(update, context, percent=None, offset=None): _ = set_lang(update, context) update.effective_message.reply_text(_("Cropping your PDF file"), reply_markup=ReplyKeyboardRemove()) with tempfile.NamedTemporaryFile(suffix=".pdf") as tf: user_data = context.user_data file_id, file_name = user_data[PDF_INFO] pdf_file = context.bot.get_file(file_id) pdf_file.download(custom_path=tf.name) with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join(dir_name, f"Cropped_{file_name}") command = f'pdf-crop-margins -o "{out_fn}" "{tf.name}"' if percent is not None: command += f" -p {percent}" else: command += f" -a {offset}" if run_cmd(command): send_result_file(update, context, out_fn, "crop") else: update.effective_message.reply_text( _("Something went wrong, try again")) # Clean up memory if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] return ConversationHandler.END
def compare_pdf(update, context): _ = set_lang(update, context) message = update.effective_message message.reply_text(_("Comparing your PDF files"), reply_markup=ReplyKeyboardRemove()) with tempfile.NamedTemporaryFile() as tf1, tempfile.NamedTemporaryFile( ) as tf2: # Download PDF files user_data = context.user_data first_file_id = user_data[COMPARE_ID] first_file = context.bot.get_file(first_file_id) first_file.download(custom_path=tf1.name) second_file = message.document.get_file() second_file.download(custom_path=tf2.name) try: with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join(dir_name, "Differences.png") pdf_diff.main(files=[tf1.name, tf2.name], out_file=out_fn) send_result_file(update, context, out_fn, "compare") except NoDifferenceError: message.reply_text( _("There are no differences in text between your PDF files")) # Clean up memory and files if user_data[COMPARE_ID] == first_file_id: del user_data[COMPARE_ID] return ConversationHandler.END
def url_to_pdf(update: Update, context: CallbackContext): _ = set_lang(update, context) message = update.effective_message url = message.text user_data = context.user_data if user_data is not None and URLS in user_data and url in user_data[URLS]: message.reply_text( _("You've sent me this web page already and I'm still converting it") ) else: message.reply_text(_("Converting your web page into a PDF file")) if URLS in user_data: user_data[URLS].add(url) else: user_data[URLS] = {url} with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join(dir_name, f"{urlparse(url).netloc}.pdf") try: HTML(url=url).write_pdf(out_fn) send_result_file(update, context, out_fn, "url") except URLFetchingError: message.reply_text(_("Unable to reach your web page")) user_data[URLS].remove(url)
def send_result_photos(update, context, dir_name, task): _ = set_lang(update, context) message = update.effective_message if message.text == _(PHOTOS): for photo_name in sorted(os.listdir(dir_name)): photo_path = os.path.join(dir_name, photo_name) if os.path.getsize(photo_path) <= MAX_FILESIZE_UPLOAD: try: message.chat.send_action(ChatAction.UPLOAD_PHOTO) message.reply_photo(open(photo_path, "rb")) except BadRequest: message.chat.send_action(ChatAction.UPLOAD_DOCUMENT) message.reply_document(open(photo_path, "rb")) message.reply_text( _("See above for all your photos"), reply_markup=get_support_markup(update, context), ) update_stats(update, task) else: # Compress the directory of photos shutil.make_archive(dir_name, "zip", dir_name) # Send result file send_result_file(update, context, f"{dir_name}.zip", task)
def add_ocr_to_pdf(update, context): if not check_user_data(update, context, PDF_INFO): return ConversationHandler.END _ = set_lang(update, context) update.effective_message.reply_text( _("Adding an OCR text layer to your PDF file"), reply_markup=ReplyKeyboardRemove(), ) with tempfile.NamedTemporaryFile() as tf: user_data = context.user_data file_id, file_name = user_data[PDF_INFO] pdf_file = context.bot.get_file(file_id) pdf_file.download(custom_path=tf.name) with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join(dir_name, f"OCR_{os.path.splitext(file_name)[0]}.pdf") try: # logging.getLogger("ocrmypdf").setLevel(logging.WARNING) ocrmypdf.ocr(tf.name, out_fn, deskew=True, progress_bar=False) send_result_file(update, context, out_fn, "ocr") except PriorOcrFoundError: update.effective_message.reply_text( _("Your PDF file already has a text layer")) # Clean up memory if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] return ConversationHandler.END
def send_pdf_text(update, context, pdf_texts, is_file, out_fn): _ = set_lang(update, context) message = update.effective_message if pdf_texts: if is_file: with open(out_fn, "w") as f: f.write("\n".join(pdf_texts)) send_result_file(update, context, out_fn, "get_text") else: msg_text = "" for pdf_text in pdf_texts: if len(msg_text) + len(pdf_text) + 1 > MAX_MESSAGE_LENGTH: message.reply_text(msg_text.strip()) msg_text = "" msg_text += f" {pdf_text}" if msg_text: message.reply_text(msg_text.strip()) message.reply_text( _("*See above for all the text in your PDF file*"), parse_mode=ParseMode.MARKDOWN, ) else: message.reply_text(_("I couldn't find any text in your PDF file"))
def compress_pdf(update, context): if not check_user_data(update, context, PDF_INFO): return ConversationHandler.END _ = set_lang(update, context) update.effective_message.reply_text( _("Compressing your PDF file"), reply_markup=ReplyKeyboardRemove(), ) with tempfile.NamedTemporaryFile() as tf: user_data = context.user_data file_id, file_name = user_data[PDF_INFO] pdf_file = context.bot.get_file(file_id) pdf_file.download(custom_path=tf.name) with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join( dir_name, f"Compressed_{os.path.splitext(file_name)[0]}.pdf") cmd = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/default \ -dNOPAUSE -dQUIET -dBATCH -sOutputFile={} {}".format( out_fn, tf.name) proc = Popen(shlex.split(cmd), stdout=PIPE, stderr=PIPE, shell=False) out, err = proc.communicate() if proc.returncode != 0: log = Logger() log.error( f'Stdout:\n{out.decode("utf-8")}\n\nStderr:\n{err.decode("utf-8")}' ) update.effective_message.reply_text( _("Something went wrong, try again")) else: old_size = os.path.getsize(tf.name) new_size = os.path.getsize(out_fn) update.effective_message.reply_text( _("File size reduced by <b>{:.0%}</b>, " "from <b>{}</b> to <b>{}</b>".format( (1 - new_size / old_size), humanize.naturalsize(old_size), humanize.naturalsize(new_size), )), parse_mode=ParseMode.HTML, ) send_result_file(update, context, out_fn, "compress") # Clean up memory if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] return ConversationHandler.END
def rename_pdf(update, context): result = check_back_user_data(update, context) if result is not None: return result _ = set_lang(update, context) message = update.effective_message text = re.sub(r"\.pdf$", "", message.text) invalid_chars = r"\/*?:\'<>|" if set(text) & set(invalid_chars): message.reply_text( _( "File names can't contain any of the following characters:\n{}\n" "Send me another file name" ).format(invalid_chars) ) return WAIT_FILE_NAME new_fn = "{}.pdf".format(text) message.reply_text( _("Renaming your PDF file into <b>{}</b>").format(new_fn), parse_mode=ParseMode.HTML, reply_markup=ReplyKeyboardRemove(), ) # Download PDF file user_data = context.user_data file_id, _ = user_data[PDF_INFO] tf = tempfile.NamedTemporaryFile() pdf_file = context.bot.get_file(file_id) pdf_file.download(custom_path=tf.name) # Rename PDF file with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join(dir_name, new_fn) shutil.move(tf.name, out_fn) send_result_file(update, context, out_fn, "rename") # Clean up memory and files if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] try: tf.close() except FileNotFoundError: pass return ConversationHandler.END
def compress_pdf(update, context): if not check_user_data(update, context, PDF_INFO): return ConversationHandler.END _ = set_lang(update, context) update.effective_message.reply_text( _("Compressing your PDF file"), reply_markup=ReplyKeyboardRemove(), ) with tempfile.NamedTemporaryFile() as tf: user_data = context.user_data file_id, file_name = user_data[PDF_INFO] pdf_file = context.bot.get_file(file_id) pdf_file.download(custom_path=tf.name) with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join( dir_name, f"Compressed_{os.path.splitext(file_name)[0]}.pdf") command = ("gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 " "-dPDFSETTINGS=/default -dNOPAUSE -dQUIET -dBATCH " f'-sOutputFile="{out_fn}" "{tf.name}"') if run_cmd(command): old_size = os.path.getsize(tf.name) new_size = os.path.getsize(out_fn) update.effective_message.reply_text( _("File size reduced by <b>{:.0%}</b>, " "from <b>{}</b> to <b>{}</b>".format( (1 - new_size / old_size), humanize.naturalsize(old_size), humanize.naturalsize(new_size), )), parse_mode=ParseMode.HTML, ) send_result_file(update, context, out_fn, "compress") else: update.effective_message.reply_text( _("Something went wrong, try again")) # Clean up memory if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] return ConversationHandler.END
def text_to_pdf(update, context): _ = set_lang(update, context) message = update.effective_message text = message.text if text == _(CANCEL): return cancel(update, context) message.reply_text(_("Creating your PDF file"), reply_markup=ReplyKeyboardRemove()) html = HTML(string=BASE_HTML.format(text.replace("\n", "<br/>"))) with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join(dir_name, "Text.pdf") html.write_pdf(out_fn) send_result_file(update, context, out_fn, "text") return ConversationHandler.END
def process_photo(update: Update, context: CallbackContext, file_ids: List[str], is_beautify: bool) -> None: _ = set_lang(update, context) if is_beautify: update.effective_message.reply_text( _("Beautifying and converting your photos"), reply_markup=ReplyKeyboardRemove(), ) else: update.effective_message.reply_text( _("Converting your photos into PDF"), reply_markup=ReplyKeyboardRemove()) # Setup temporary files temp_files = [tempfile.NamedTemporaryFile() for _ in range(len(file_ids))] photo_files = [] # Download all photos for i, file_id in enumerate(file_ids): file_name = temp_files[i].name photo_file = context.bot.get_file(file_id) photo_file.download(custom_path=file_name) photo_files.append(file_name) with tempfile.TemporaryDirectory() as dir_name: if is_beautify: out_fn = os.path.join(dir_name, "Beautified.pdf") noteshrink.notescan_main(photo_files, basename=f"{dir_name}/page", pdfname=out_fn) send_result_file(update, context, out_fn, "beautify") else: out_fn = os.path.join(dir_name, "Converted.pdf") with open(out_fn, "wb") as f: f.write(img2pdf.convert(photo_files)) send_result_file(update, context, out_fn, "to_pdf") # Clean up files for tf in temp_files: tf.close()
def get_pdf_preview(update, context): result = check_back_user_data(update, context) if result is not None: return result _ = set_lang(update, context) update.effective_message.reply_text( _("Extracting a preview for your PDF file"), reply_markup=ReplyKeyboardRemove()) with tempfile.NamedTemporaryFile() as tf1: user_data = context.user_data file_id, file_name = user_data[PDF_INFO] pdf_reader = open_pdf(update, context, file_id, tf1.name) if pdf_reader: # Get first page of PDF file pdf_writer = PdfFileWriter() pdf_writer.addPage(pdf_reader.getPage(0)) with tempfile.NamedTemporaryFile() as tf2: # Write cover preview PDF file with open(tf2.name, "wb") as f: pdf_writer.write(f) with tempfile.TemporaryDirectory() as dir_name: # Convert cover preview to JPEG out_fn = os.path.join( dir_name, f"Preview_{os.path.splitext(file_name)[0]}.png") imgs = pdf2image.convert_from_path(tf2.name, fmt="png") imgs[0].save(out_fn) # Send result file send_result_file(update, context, out_fn, "preview") # Clean up memory and files if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] return ConversationHandler.END
def crop_pdf(update, context, percent=None, offset=None): _ = set_lang(update, context) update.effective_message.reply_text(_("Cropping your PDF file"), reply_markup=ReplyKeyboardRemove()) with tempfile.NamedTemporaryFile(suffix=".pdf") as tf: user_data = context.user_data file_id, file_name = user_data[PDF_INFO] pdf_file = context.bot.get_file(file_id) pdf_file.download(custom_path=tf.name) with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join(dir_name, f"Cropped_{file_name}") if percent is not None: cmd = f"pdf-crop-margins -p {percent} -o {out_fn} {tf.name}" else: cmd = f"pdf-crop-margins -a {offset} -o {out_fn} {tf.name}" proc = Popen(shlex.split(cmd), stdout=PIPE, stderr=PIPE, shell=False) out, err = proc.communicate() if proc.returncode != 0: log = Logger() log.error( f'Stdout:\n{out.decode("utf-8")}\n\nStderr:\n{err.decode("utf-8")}' ) update.effective_message.reply_text( _("Something went wrong, try again")) else: send_result_file(update, context, out_fn, "crop") # Clean up memory if user_data[PDF_INFO] == file_id: del user_data[PDF_INFO] return ConversationHandler.END
def text_to_pdf(update: Update, context: CallbackContext, font_family: str, font_url: str): if not check_user_data(update, context, TEXT): return ConversationHandler.END _ = set_lang(update, context) text = context.user_data[TEXT] update.effective_message.reply_text(_("Creating your PDF file"), reply_markup=ReplyKeyboardRemove()) html = HTML(string="<p>{}</p>".format(text.replace("\n", "<br/>"))) font_config = FontConfiguration() stylesheets: List[CSS] = None if font_family != DEFAULT_FONT: stylesheets = [ CSS( string=("@font-face {" f"font-family: {font_family};" f"src: url({font_url});" "}" "p {" f"font-family: {font_family};" "}"), font_config=font_config, ) ] with tempfile.TemporaryDirectory() as dir_name: out_fn = os.path.join(dir_name, "Text.pdf") html.write_pdf(out_fn, stylesheets=stylesheets, font_config=font_config) send_result_file(update, context, out_fn, "text") return ConversationHandler.END