async def hmm(event): if not event.reply_to_msg_id: await event.reply("Reply to any Pdf File.") return hmmu = await event.reply("hmm... Please Wait...🚶") lol = await event.get_reply_message() starky = await borg.download_media(lol.media, Config.TMP_DOWNLOAD_DIRECTORY) hmmu = await event.reply("hmm... Please Wait...🚶") pdf_file = starky docx_file = './fridaybot/DOWNLOADS/FRIDAYOT.docx' parse(pdf_file, docx_file, start=0, end=None) await borg.send_file( event.chat_id, docx_file, caption= f"*PDF Converted Into Docx by Friday bot. Get your Friday From @FRIDAYOT." ) os.remove(pdf_file) os.remove(docx_file) await event.delete()
def pdf2docx_pdf_html(input_pdf, input_docx_location): # docx_name = r"/Users/vigneshramamurthy/opt/anaconda3/Workscripts/Spyder/Sprint 2.21 - Mexico Unilever, Sydney Chennai, APAC Pepsico Penang, APAC Sydeny:Chennai /Accounts/GWF/Script/Word_location/Test/xx.docx" parse(input_pdf, input_docx_location) # parse(input_pdf, docx_name, start=page_no - 1, end=page_no) x = mammoth.convert_to_html(input_docx_location, style_map="b => b").value html = BeautifulSoup(x, 'html.parser') return html
def test_multi_pages(self): '''test converting pdf with multi-pages.''' filename = 'demo' pdf_file = os.path.join(self.sample_dir, f'{filename}.pdf') docx_file = os.path.join(self.output_dir, f'{filename}.docx') parse(pdf_file, docx_file, start=1, end=5) # check file assert os.path.isfile(docx_file)
def pdftodocx(): filename = filedialog.askopenfilename(title='Select a PDF File', filetypes=[('PDF File', '*.pdf')]) if filename != '': pdf_docx['text'] = 'Converting...' from pdf2docx import parse parse(filename, filename.replace('.pdf', '_converted.docx')) messagebox.showinfo('Done!', 'PDF to Docx converted successfully.') pdf_docx['text'] = 'PDF to Docx'
def pdf_to_docx(file, pages): try: pdf_file = file # docx_file = path + 'nestle_file_p1.docx' # convert pdf to docx parse(pdf_file, converted_docx, pages=[pages - 1]) return converted_docx except: pass
def saveasdocx(self): global filename converted_filename=self.filename[0].replace('.pdf','_converted.docx') parse(self.filename[0],converted_filename) if self.allowOpen.checkState(): os.startfile(converted_filename) self.allowOpen.setCheckState(False) self.convertPDF.setDisabled(True)
def pdf_to_docx_to_html(self, input_pdf, page_no): # 3 docx_name = f'{self.temp_directory.name}/{page_no}.docx' pdf_obj = pdfplumber.open(input_pdf) if not os.path.exists(docx_name) and page_no - 1 in range( len(pdf_obj.pages)): parse(input_pdf, docx_name, start=page_no - 1, end=page_no) x = mammoth.convert_to_html(docx_name, style_map="b => b").value html = BeautifulSoup(x, 'html.parser') else: x = "" html = BeautifulSoup(x, 'html.parser') return html
def to_docx(self, docx_file=None): """ pdf转docx """ check_install_package('pdf2docx') from pdf2docx import parse pdf_file = self.src_file if docx_file is None: docx_file = pdf_file.with_suffix('.docx') # 注意这里是日志显示进度,不是printf输出. parse(str(pdf_file), str(docx_file))
def convert_pdf2docx(input_file: str, pages: Tuple = None): if pages: pages = [int(i) for i in list(pages) if i.isnumeric()] result = parse(pdf_file=input_file, pages=pages) print("###### Conversion Complete #######") return result
def edit_pdf_file(file_name): file_to_edit = os.path.join(uploads_dir, file_name) output = f'{file_name}-ouput.docx' output_file = os.path.join(uploads_dir, output) parse(file_to_edit, output_file, start=0, end=None) context = { 'filename': str(file_name), 'file_path': file_to_edit, 'output': output } return render_template("file-edit.html", **context)
def main(self): if self.fileext == "pdf": pdf = PyPDF2.PdfFileReader(open(self.filepath, "rb")) with open(self.filepath.replace(".pdf", ".docx"), "a") as file: for page in pdf.pages: file.write(page.extractText()) else: new_filename = self.filepath.replace(".pdf", ".docx") return parse(self.filepath, new_filename, start=0, end=1)
def ResumeAnalyzer(applicant_resume_pdf, job_description_pdf): applicant_resume_docx = resume_docx_path job_description_docx = job_desc_docx_path parse(applicant_resume_pdf, applicant_resume_docx, start=0, end=None) parse(job_description_pdf, job_description_docx, start=0, end=None) result1 = docx2txt.process(applicant_resume_docx) result2 = docx2txt.process(job_description_docx) text = [result1, result2] cv = CountVectorizer() count_matrix = cv.fit_transform(text) match_percentage = cosine_similarity(count_matrix)[0][1] * 100 match_percentage = round(match_percentage, 2) return match_percentage
def pdf_word(): upload_file = request.files['pdf-word'] if upload_file.filename != '': docx_extension = ".docx" filename, file_extension = os.path.splitext(upload_file.filename) md5_obj = hashlib.md5() md5_obj.update(filename.encode('utf-8')) filename_hash = md5_obj.hexdigest() full_filename = secure_filename(filename_hash) + file_extension docx_filename = secure_filename(filename_hash) + docx_extension upload_file.save(os.path.join(DIR, full_filename)) docx_url = DIR + filename_hash + docx_extension parse(os.path.join(DIR, full_filename), docx_url, start=0) return jsonify({ 'error': False, 'file': url_for('uploaded_file', filename=docx_filename) }) else: return jsonify({'error': True, 'message': 'Please provide the file'})
async def hmm(event): if not event.reply_to_msg_id: await event.edit("Reply to any Pdf File.") return await event.edit("hmm... Please Wait...🚶") lol = await event.get_reply_message() starky = await borg.download_media(lol.media, Config.TMP_DOWNLOAD_DIRECTORY) await event.edit("hmm... Please Wait..") pdf_file = starky docx_file = "./virtualuserbot/DOWNLOADS/Infinity_Bots.docx" parse(pdf_file, docx_file, start=0, end=None) await borg.send_file( event.chat_id, docx_file, caption= f"*PDF Converted Into Docx by VirtualUserbot. Credits to @FRIDAYOT.", ) os.remove(pdf_file) os.remove(docx_file) await event.delete()
def convert(request, pk): document = get_object_or_404(Document, pk=pk) print(document.document) translator = Translator() parse(f"D:\\audio\\audio\\static\\" + str(document.document), f"D:\\audio\\audio\\static\\" + str(document.document)[:-4] + ".docx", start=0) print("PDF --> DOCX") # extract text text = docxpy.process(f"D:\\audio\\audio\\static\\" + str(document.document)[:-4] + ".docx") print("DOCX --> TXT") print(text) print("Translating...") translator = Translator() translation = translator.translate(text, dest="hi") print("English --> Hindi") print(translation.text) context = {'document': document} return render(request, 'audio_detail.html', context)
def pdf_convert_doc(inputf: str, outputf: str, page: Tuple = None): if pages: pages = [int(x) for x in list(pages) if x.isnumeric()] result = parse(pdf=input, doc_pth=output, pages=pages) summery = {'file': input, 'doc': output, 'pages': str(pages)} # Printing Summary print( "## Summary ########################################################") print("\n".join("{}:{}".format(x, y) for x, y in summary.items())) print( "###################################################################") return result
def convert_pdf2docx(input_file: str, output_file: str, pages: Tuple = None): """Converts pdf to docx""" if pages: pages = [int(i) for i in list(pages) if i.isnumeric()] result = parse(pdf_file=input_file, docx_with_path=output_file, pages=pages) summary = { "File": input_file, "Pages": str(pages), "Output File": output_file } # Printing Summary print("## Summary ########################################################") print("\n".join("{}:{}".format(i, j) for i, j in summary.items())) print("###################################################################") return result
async def starky(event): un = event.pattern_match.group(1) rndm = uuid.uuid4().hex frid = uuid.uuid4().hex diro = f"./{rndm}/" dirb = f"./{frid}/" os.makedirs(diro) os.makedirs(dirb) media_count = 0 text_count = 0 if un: chnnl = un else: chnnl = event.chat_id await event.edit(f"**Fetching All Files From This Channel**") try: chnnl_msgs = await borg.get_messages(chnnl, limit=3000) except: await event.edit( "**Unable To fetch Messages !** \n`Please, Check Channel Details And IF THere Are Any Media :/`" ) return total = int(chnnl_msgs.total) await event.edit(f"**Downloading {total} Media/Messages**") for d in chnnl_msgs: if d.media: media_count += 1 await borg.download_media(d.media, diro) if d.text: text_count += 1 await event.edit( f"**Total Media :** `{total}` \n**Downloaded Media :** `{media_count}` \n**Total Texts :** `{text_count}` \n**Now Converting Files.**" ) Azx = glob.glob(f"{diro}*.pdf") for friday in Azx: N = 9 res = ''.join( random.choices(string.ascii_uppercase + string.digits, k=N)) pdf_file = friday docx_file = f'{dirb}{str(res)}.docx' parse(pdf_file, docx_file, start=0, end=None) Ax = glob.glob(f"{dirb}*.docx") for pop in Ax: await borg.send_file( event.chat_id, pop, caption= f"**Total Media :** `{total}` \n**Downloaded Media :** `{media_count}` \n**Total Texts :** `{text_count}` \n**By @fridayot**" ) Azx = glob.glob(f"{diro}*") Azpx = glob.glob(f"{dirb}*") for x in Azx: os.remove(x) for pop in Azpx: os.remove(pop) os.rmdir(diro) os.rmdir(dirb)
def pdf_to_word(pdf_file): word_file = 'word.docx' parse(pdf_file, word_file)
def convert_pdf_to_word(self, document_path): name, extension = self.get_document_name_and_extension(document_path) new_filename = name + '.' + 'docx' location_path = os.path.join(MEDIA_ROOT, new_filename) parse(document_path, location_path) return location_path
'results-{}'.format(str(datetime.now()).replace(':', '-')[:19])) try: os.mkdir(curr_result_dir) except OSError: print(curr_result_dir + " exists in the system") pdf_path = curr_result_dir + r'\ocr.pdf' pdf = canvas.Canvas(pdf_path, bottomup=0, pagesize=(img.shape[1], img.shape[0])) pdf.setTitle('OCR Results' + str(datetime.now())[:10]) px, py, ph = None, None, None for label, (x, y, w, h) in zip(labels, boxs): if px is None and py is None: px, py = x, y pw, ph = w, h if py + ph > y: pdf.setFont('Times-Bold', ph) pdf.drawString(x, py + ph, label) else: pdf.setFont('Times-Bold', h) pdf.drawString(x, y + h, label) px, py, ph = x, y, h pdf.save() parse(pdf_path, curr_result_dir + r'\ocr.docx', start=0, end=None) print('Results path {}'.format(result_dir)) cv2.imwrite(curr_result_dir + r'\overview.png', overview)
def pdf_to_docx(pdf_file, page, docx_location): # docx_file = path + 'file_p2.docx' parse(pdf_file, docx_location, pages=[page - 1]) return docx_location
def index(request): if not request.user.is_authenticated: print("Trying to open without Signning In") request.session["bar"] = "Please Login In and Try Again" return redirect('/login') global unique_id logs = [] if (request.method == 'POST'): if request.POST.get("button") == "UPLOAD & POPULATE": #deleteUpNDownloads(unique_id) print("Username : {} | Started to Process".format(request.user.username)) unique_id = "81948" # str(random.randint(10000,99999))#"unique_id" # if database.child("ingested data").get().val(): while unique_id in database.child("ingested data").get().val().keys(): unique_id = str(random.randint(10000,99999))#"unique_id" print("Reference ID : ", unique_id) ppValues = {} try: if not os.path.exists(os.path.join(UPLOAD_LOCATION, unique_id)): os.mkdir(os.path.join(UPLOAD_LOCATION, unique_id)) if not os.path.exists(os.path.join(UPLOAD_LOCATION, unique_id, "manuscript")): os.mkdir(os.path.join(UPLOAD_LOCATION, unique_id, "manuscript")) if not os.path.exists(os.path.join(UPLOAD_LOCATION, unique_id, "images")): os.mkdir(os.path.join(UPLOAD_LOCATION, unique_id, "images")) if not os.path.exists(os.path.join(UPLOAD_LOCATION, unique_id, "others")): os.mkdir(os.path.join(UPLOAD_LOCATION, unique_id, "others")) except: pass logs.append("Auto Populate Done") print("Uploading Files and Parse") for file in request.FILES.getlist("files"): if file.name.split(".")[-1] in ALLOWED_MS_FORMATS : word_count, text = 0, "" logs.append("File : {} - Successfully Uploaded".format(str(file.name))) with open(os.path.join(UPLOAD_LOCATION, unique_id, "manuscript", file.name), 'wb+') as destination: for i, chunk in enumerate(file.chunks()): destination.write(chunk) destination.close() if file.name.split(".")[-1] == "docx": convert(os.path.join(UPLOAD_LOCATION, unique_id, "manuscript", file.name), os.path.join(UPLOAD_LOCATION, unique_id, "manuscript", file.name.split(".")[0] + ".pdf")) word_count, text = word_counter(os.path.join(UPLOAD_LOCATION, unique_id, "manuscript", file.name)) else: pdf2docx.parse(os.path.join(UPLOAD_LOCATION, unique_id, "manuscript", file.name), os.path.join(UPLOAD_LOCATION, unique_id, "manuscript", file.name.split(".")[0] + ".docx")) word_count, text = word_counter(os.path.join(UPLOAD_LOCATION, unique_id, "manuscript", file.name.split(".")[0] + ".docx")) os.unlink(os.path.join(UPLOAD_LOCATION, unique_id, "manuscript", file.name.split(".")[0] + ".docx")) createRnPlogs(text, unique_id) gDF = createSpellLogs(text, unique_id) createIzYzLogs(gDF, unique_id) createGrammarLogs(text, unique_id) createCommaLogs(text, unique_id) # client.process(os.path.join(UPLOAD_LOCATION, unique_id, "manuscript"), os.path.join(UPLOAD_LOCATION, unique_id, "manuscript"), 10, "processHeaderDocument", False, 1, 0, True, False) coI, fund = 0, 0 headingList = minidom.parse(os.path.join(UPLOAD_LOCATION, "unique_id", "manuscript", file.name.split(".")[0] + ".tei.xml")).getElementsByTagName('head') for head in headingList: if head.firstChild: if "conflict of interest" in head.firstChild.data.lower(): coI = 1 if "funding" in head.firstChild.data.lower(): fund = 1 with open(os.path.join(UPLOAD_LOCATION, "unique_id", "manuscript", file.name.split(".")[0] + ".tei.xml"), 'rb') as tei: soup = bs4.BeautifulSoup(tei, 'lxml') try: a_title = soup.title.getText() except: a_title = "" try: a_type = soup.title.get_attribute_list("type")[0] except: a_type = "" try: date = soup.date.getText() except: date = "" try: authors = ([a.persname.getText(" ") for a in soup.analytic.findAll("author")] if soup.analytic.parent.parent.name=="sourcedesc" else []) except: authors = [] try: abstract = soup.abstract.getText(separator=' ', strip=True) except: abstract = "" try: n_tables = len(list(dict.fromkeys(soup.findAll("table")))) except: n_tables = "" try: n_figures = sum([1 for f in soup.findAll("figure") if ("fig" in f.get_attribute_list("xml:id")[0] and type(list(f.children)[0])==bs4.element.NavigableString)]) except: n_figures = "" try: doi = soup.find('idno', type='DOI').getText() except: doi = "" mail = request.user.email c_interest = coI funding = fund ppValues = {"Content_Text": text, "Ref_ID": unique_id, "Mail_ID": mail, "Article_Title": a_title, "Article_Type": a_type, "Published_Date": date, "Authors": authors, "No_of_Figures": n_figures, "No_of_Tables": n_tables, "Abstract": abstract, "Special_Instructions": "none", "DOI": doi, "Conflict_of_Interest": c_interest, "Funding": fund, "Word_Count": word_count} if file.name.split(".")[-1] == "docx": os.unlink(os.path.join(UPLOAD_LOCATION, unique_id, "manuscript", file.name.split(".")[0] + ".pdf")) else: logs.append("File : {} - Upload Error : Not Supported".format(str(file.name))) for file in request.FILES.getlist("images"): if file.name.split(".")[-1] in ALLOWED_IM_FORMATS: logs.append("Image : {} - Successfully Uploaded".format(str(file.name))) with open(os.path.join(UPLOAD_LOCATION, unique_id, "images", file.name), 'wb+') as destination: for i, chunk in enumerate(file.chunks()): destination.write(chunk) else: logs.append("Image : {} - Upload Error : Not Supported".format(str(file.name))) for file in request.FILES.getlist("others"): logs.append("Other : {} - Successfully Uploaded".format(str(file.name))) with open(os.path.join(UPLOAD_LOCATION, unique_id, "others", file.name), 'wb+') as destination: for i, chunk in enumerate(file.chunks()): destination.write(chunk) context = {'logs':"\n".join(logs)} context.update(ppValues) return render(request, '../templates/ingPage.html', context) if request.POST.get("button") == "SUBMIT": mail = request.POST['mail_id'] doi = request.POST['doi'] aTitle = request.POST['article_title'] aType = request.POST['article_type'] abstract = request.POST['abstract'] date = request.POST['published_date'] author = request.POST.getlist("authors") figures = request.POST['no_of_figures'] tables = request.POST['no_of_tables'] instruct = request.POST['spl_instruct'] cInterest = request.POST['c_Interest'] funding = request.POST['funding'] refID = request.POST['unique_id'] wCount = request.POST['word_count'] message = request.POST['message'] # text = request.POST['content_text'] username = request.user.username dbValues = {"username": username, "mail id": mail, "message": message, "doi":doi, "word count":wCount,"article title": aTitle, "article type": aType, "published date": date, "authors": author, "no of figures": figures, "no of tables": tables, "abstract": abstract, "special instructions":instruct, "conflict of interest":cInterest, "funding information":funding} print("Values to be saved in Database : ", dbValues) if not len(os.listdir(os.path.join(UPLOAD_LOCATION, refID, "images"))) == int(figures): ppValues = {"Ref_ID": refID, "Mail_ID": mail, "Article_Title": aTitle, "Article_Type": aType, "Published_Date": date, "Authors": author, "No_of_Figures": "", "No_of_Tables": tables, "Abstract": abstract, "Special_Instructions": instruct, "DOI": doi, "Conflict_of_Interest": cInterest, "Funding": funding, "Word_Count": wCount} context = {'logs':"Submission Error : Image count", "alert":"#ff8888"} context.update(ppValues) return render(request, '../templates/ingPage.html', context) # return render(request, '../templates/ingPage.html', {"logs":"Submission Error : Image count", "alert":"#ff8888"}) for folder in os.listdir(os.path.join(UPLOAD_LOCATION, refID)): for file in os.listdir(os.path.join(UPLOAD_LOCATION, refID, folder)): path_on_cloud = CLOUD_LOCATION path_on_local = os.path.join(UPLOAD_LOCATION, refID, folder, file) storage.child(path_on_cloud).child(refID).child(folder).child(file).put(path_on_local) os.unlink(path_on_local) database.child(CLOUD_LOCATION).child(refID).set(dbValues) if mailService.start(): logs.append("Confirmation mail has been sent to " + str(mail)) mailService.send(mail, "Confirmation mail for your Submission", "Amnet Systems\nReference ID\t:\t{} \nYour request has been submitted successfully".format(refID)) logs.append("Database : Updated Successfully") context = {'logs':"\n".join(logs), "alert":"#99ff99"} return render(request, '../templates/ingPage.html', context) context = {'logs': "", "alert":"#D6EAF8", "Conflict_of_Interest": 0, "Funding": 0} return render(request, '../templates/ingPage.html', context)
def ultimate_converter(give_path, get_folder, get_format, type=False, cut_text=False, crop_gif=['00:00:00.00', '00:00:01.00', 1], tags=False): ask_continue = True # check paths if not os.path.exists(get_folder) and ask_continue: get_folder = BASE_DIR + '/' + get_folder print(get_folder) if not os.path.exists(get_folder): print('path end are not exist') ask_continue = False if not os.path.exists(give_path) and ask_continue: give_path = BASE_DIR + '/' + give_path print(give_path) if not os.path.exists(give_path): print('path start are not exist') ask_continue = False if ask_continue: # give format pattern = re.compile(r'\w+') give_format = pattern.findall(give_path)[-1] print(give_format) if get_format != give_format: print('corect format') # get name ticks = time.time() get_name = str(round(ticks * 1000000)) + str(random.randint(0, 9)) # generate path get_path = get_folder + get_name + '.' + get_format print(get_path) # get object type_arr = get_obj() if not type: # available type check_arr = [] if give_format in type_arr['text']['get']: check_arr.append('text') if give_format in type_arr['image']['get']: check_arr.append('image') if give_format in type_arr['audio']['get']: check_arr.append('audio') if give_format in type_arr['video']['get']: check_arr.append('video') if give_format in type_arr['object']['get']: check_arr.append('object') if give_format in type_arr['subtitles']['get']: check_arr.append('subtitles') if give_format in type_arr['code']['get']: check_arr.append('code') if give_format in type_arr['font']['get']: check_arr.append('font') if give_format in type_arr['archive']['get']: check_arr.append('archive') print(check_arr) # select type if len(check_arr) == 0: sys.exit() elif len(check_arr) > 1: i = False while i == False: try: select_type = input( 'select type format like index:') select_type = int(select_type) check_arr[select_type] i = True except: print('wrong type') else: select_type = 0 print('only one type') type = check_arr[select_type] # check is format in array #print([get_format,type_arr[type]['get']]) #print([give_format,type_arr[type]['give']]) if get_format in type_arr[type]['get'] and give_format in type_arr[ type]['give']: print('corect format') json_settings = [] # Type TEXT if type == 'text': import pypandoc print('type text') if give_format == 'pdf': print('give_file is pdf file') # convert pdf to docx from pdf2docx import parse # get new path get_doc_path = get_folder + get_name + '.docx' parse(give_path, get_doc_path, start=0, end=None) # convert docx to format if get_format != 'docx': output = pypandoc.convert_file(get_doc_path, get_format, outputfile=get_path) # delete old file os.remove(get_doc_path) else: output = pypandoc.convert_file(give_path, get_format, outputfile=get_path) # Type IMAGE elif type == 'image': print('type image') from PIL import Image arr = get_obj('doctoimg') + get_obj('img_layers') arr2 = get_obj('vid') if give_format in arr: print('give_file is pdf file') from wand.image import Image as wi # create new path path = get_folder + get_name os.mkdir(path) pdf = wi(filename=give_path, resolution=300) pdfimage = pdf.convert("png") i = 1 # create images for img in pdfimage.sequence: page = wi(image=img) img_path = path + '/' + str(i) + ".png" page.save(filename=img_path) # create info file_info = { 'filename': str(i), 'format': get_format, 'path': get_folder + get_name + '/' } # cut text if cut_text: text = get_text_image(get_path, 600, 120) print(text) file_info['text'] = [] file_info['text'].append({ 'x': 0, 'y': 0, 'width': 0, 'height': 0, 'text': text }) # add tags if tags: file_info['tags'] = tags # add to json json_settings.append(file_info) # change format if get_format != 'png': image = Image.open(img_path) arr = get_obj('img_jpg') if get_format in arr: image = image.convert('RGB') cur_path = path + '/' + str( i) + '.' + get_format image.save(cur_path) # delete old file os.remove(img_path) i += 1 elif give_format in arr2 and get_format == 'gif': print('crop gif') # 1,22.65 = 1 min 22 sec 65 milisec movie_start = crop_gif[0] movie_end = crop_gif[1] try: size = crop_gif[2] except: size = 1 from moviepy.editor import VideoFileClip clip = (VideoFileClip(give_path).subclip( (movie_start), (movie_end)).resize(size)) clip.write_gif(get_path) else: image = Image.open(give_path) arr = get_obj('img_jpg') if get_format in arr: image = image.convert('RGB') image.save(get_path) # create info file_info = { 'filename': get_name, 'format': get_format, 'path': get_folder } # cut text if cut_text: text = get_text_image(get_path, 600, 120) print(text) # add text file_info['text'] = [] file_info['text'].append({ 'x': 0, 'y': 0, 'width': 0, 'height': 0, 'text': text }) # add to json if tags: file_info['tags'] = tags json_settings.append(file_info) # Type AUDIO elif type == 'audio': import ffmpy print('type audio') if get_format == 'mp3': from moviepy.editor import VideoFileClip # create new path get_mp3_path = get_folder + get_name + '.mp3' # Create file videoclip = VideoFileClip(give_path) audioclip = videoclip.audio audioclip.write_audiofile(get_mp3_path) audioclip.close() videoclip.close() # change format if get_format != 'mp3': ff = ffmpy.FFmpeg(inputs={get_mp3_path: None}, outputs={get_path: None}) ff.run() # delete old file os.remove(get_mp3_path) else: ff = ffmpy.FFmpeg(inputs={give_path: None}, outputs={get_path: None}) ff.run() # Type VIDEO elif type == 'video': print('type video') if get_format == 'gif': # 1,22.65 = 1 min 22 sec 65 milisec movie_start = crop_gif[0] movie_end = crop_gif[1] from moviepy.editor import VideoFileClip clip = (VideoFileClip(give_path).subclip( (movie_start), (movie_end)).resize(1)) clip.write_gif(get_path) else: import ffmpy ff = ffmpy.FFmpeg(inputs={give_path: None}, outputs={get_path: None}) ff.run() # create standart info if len(json_settings) == 0: # create info file_info = { 'filename': get_name, 'format': get_format, 'path': get_folder, 'page': 0 } if tags: file_info['tags'] = tags json_settings.append(file_info) # Write JSON json_file = open(get_folder + get_name + '.json', 'w+') json_file.write(json.dumps(json_settings)) json_file.close() else: print('wrong format') return get_path else: print('format should not be the same') else: print('ask_continue False')
def pdf_to_docx(pdf_file, pages, docx_file): parse(pdf_file, docx_file, pages=[pages - 1]) return docx_file
def pdf2docx(self): pdf2docx.parse(self.data_file_path, self.data_file_path + '.docx') self.data_file_path = self.data_file_path + '.docx' self.filetype = 'docx' return None
def PdfConverted(): parse(root.filename, "C:\\Users\\Shravan Sheri\\Documents\\file.docx") root.filename = ""
def pdf_to_docx(file, pages, input_docx_location): parse(file, input_docx_location, pages=[pages - 1]) return input_docx_location
def pdfToDocx(file_path, to_path): doc = fitz.open(file_path) count = doc.pageCount parse(file_path, to_path, start=0, end=count - 1)
def pdf_to_word(pdf_file): # convert to dpcx word_file = 'word.docx' parse(pdf_file, word_file)