def Pptx_To_Png(pptx_input):
    # Configure API key authorization: Apikey
    configuration = cloudmersive_convert_api_client.Configuration()
    configuration.api_key['Apikey'] = 'f0a69dc5-3973-4ab9-9ce8-6a28eda5f67a'
    # Uncomment below to setup prefix (e.g. Bearer) for API key, if needed
    # configuration.api_key_prefix['Apikey'] = 'Bearer'

    # create an instance of the API class
    api_instance = cloudmersive_convert_api_client.ConvertDocumentApi(
        cloudmersive_convert_api_client.ApiClient(configuration))
    input_file = pptx_input  # file | Input file to perform the operation on.

    try:
        # Convert PowerPoint PPTX to PNG image array
        api_response = api_instance.convert_document_pptx_to_png(input_file)
        url = api_response.png_result_pages[0].url

        return url

    except ApiException as e:
        print(
            "Exception when calling ConvertDocumentApi->convert_document_pptx_to_png: %s\n"
            % e)
Пример #2
0
def uploaded_file(filename, s, e):
    import fitz
    import pytesseract
    pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe"
    pdffile = filename
    doc = fitz.open('static' + '/' + filename)
    for i in range(int(s) - 1, int(e)):
        page = doc.loadPage(i)  # number of page
        pix = page.getPixmap()
        output = "outfile" + str(i) + ".png"
        pix.writePNG(output)
    x = ''
    for i in range(int(s) - 1, int(e)):
        x += pytesseract.image_to_string(f'outfile{str(i)}.png')
    from PyDictionary import PyDictionary
    from summa import keywords
    from summa.summarizer import summarize
    import nltk
    from nltk.tokenize import sent_tokenize
    from docx import Document
    f = x
    b = str(filename.replace('.pdf', ''))
    a = x
    a = keywords.keywords(a)
    dictionary = PyDictionary()
    a = a.split('\n')
    a1 = []
    for i in a:
        x = i.split(' ')
        for j in x:
            a1.append(j)
    a1.sort(key=lambda s: len(s))
    a1.reverse()
    try:
        a1 = a1[:20]
    except:
        pass
    a = set(a1)
    a = tuple(a1)
    a1 = []
    for i in range(10):
        try:
            a1.append(a[i])
        except:
            pass
    from nltk.stem import WordNetLemmatizer
    lemmatizer = WordNetLemmatizer()
    a = a1
    a1 = []
    for i in a:
        a1.append(lemmatizer.lemmatize(i))
    a = list(set(a1))
    a1 = a
    a = [dictionary.meaning(i) for i in a1]

    z = sent_tokenize(summarize(f, ratio=0.25))

    doc = Document()
    doc.add_heading('Notes for ' + b, 0)
    for i in z:
        doc.add_paragraph(i)
    doc.add_heading('Vocab Words from ' + b, 0)
    for i in range(len(a)):
        c = doc.add_paragraph(str(i + 1) + ') ')
        c.add_run(a1[i]).bold = True
        c.add_run(': ')
        d = str(list(a[i].values()))
        d = d.replace('[', '')
        d = d.replace(']', '')
        c.add_run(d)
        g = doc.add_paragraph('')
        g.add_run('Synonyms for ')
        g.add_run(a1[i].upper() + ': ').bold = True
        from datamuse import datamuse
        api = datamuse.Datamuse()
        s = api.words(ml=a1[i], max=10)
        s1 = []
        for i in s:
            for j in i:
                if j == 'word':
                    s1.append(i[j])
        g.add_run(str(s1).replace('[',
                                  '').replace(']',
                                              '').replace("'",
                                                          '')).italic = True
    whitelist = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
    fileName = b.replace(' ', '')
    fileName = ''.join(filter(whitelist.__contains__, fileName))
    fileName += '.docx'
    doc.save(fileName)
    import cloudmersive_convert_api_client
    from cloudmersive_convert_api_client.rest import ApiException
    configuration = cloudmersive_convert_api_client.Configuration()
    configuration.api_key['Apikey'] = 'f0c513bc-8c00-4491-830e-3e83b015feb6'
    api_instance = cloudmersive_convert_api_client.ConvertDocumentApi(
        cloudmersive_convert_api_client.ApiClient(configuration))
    try:
        # Convert Word DOCX Document to PDF
        api_response = api_instance.convert_document_docx_to_pdf(fileName)
        file = open('static/' + fileName.replace('.docx', '.pdf'), 'wb')
        file.write(api_response)
        file.close()
    except ApiException as e:
        print(
            "Exception when calling ConvertDocumentApi->convert_document_docx_to_pdf: %s\n"
            % e)
    myFile = fileName.replace('.docx', '.pdf')
    myFile2 = myFile
    note = Note(noteFile=str(myFile2), creator=current_user)
    db.session.add(note)
    db.session.commit()
    myFile = url_for('.static', filename=myFile)
    return render_template('notes.html', myFile=myFile)
Пример #3
0
#pdfdoc

# pip install cloudmersive-convert-api-client

from __future__ import print_function
import time
import cloudmersive_convert_api_client
from cloudmersive_convert_api_client.rest import ApiException
from pprint import pprint
# Configure API key authorization: Apikey
configuration = cloudmersive_convert_api_client.Configuration()
configuration.api_key['Apikey'] = 'YOUR_API_KEY'
# Uncomment below to setup prefix (e.g. Bearer) for API key, if needed
# configuration.api_key_prefix['Apikey'] = 'Bearer'
# create an instance of the API class
api_instance = cloudmersive_convert_api_client.ConvertDocumentApi(
    cloudmersive_convert_api_client.ApiClient(configuration))
input_file = 'ALL.pdf'  # file | Input file to perform the operation on.
try:
    # Convert PDF to Word DOCX Document
    api_response = api_instance.convert_document_pdf_to_docx(input_file)
    pprint(api_response)
except ApiException as e:
    print(
        "Exception when calling ConvertDocumentApi->convert_document_pdf_to_docx: %s\n"
        % e)
Пример #4
0
def notes():
    from PyDictionary import PyDictionary
    from summa import keywords
    from summa.summarizer import summarize
    import nltk
    from nltk.tokenize import sent_tokenize
    from newspaper import Article
    from docx import Document
    url = str(request.form['link'])
    a = Article(url)
    a.download()
    a.parse()
    f = a.text
    b = a.title
    a = a.text
    a = keywords.keywords(a)
    dictionary = PyDictionary()
    a = a.split('\n')
    a1 = []
    for i in a:
        x = i.split(' ')
        for j in x:
            a1.append(j)
    a1.sort(key=lambda s: len(s))
    a1.reverse()
    try:
        a1 = a1[:20]
    except:
        pass
    a = set(a1)
    a = tuple(a1)
    a1 = []
    for i in range(10):
        try:
            a1.append(a[i])
        except:
            pass
    from nltk.stem import WordNetLemmatizer
    lemmatizer = WordNetLemmatizer()
    a = a1
    a1 = []
    for i in a:
        a1.append(lemmatizer.lemmatize(i))
    a = list(set(a1))
    a1 = a
    a = [dictionary.meaning(i) for i in a1]

    z = sent_tokenize(summarize(f, ratio=0.25))

    doc = Document()
    doc.add_heading('Notes for ' + b, 0)
    for i in z:
        doc.add_paragraph(i)
    doc.add_heading('Vocab Words from ' + b, 0)
    for i in range(len(a)):
        c = doc.add_paragraph(str(i + 1) + ') ')
        c.add_run(a1[i]).bold = True
        c.add_run(': ')
        d = str(list(a[i].values()))
        d = d.replace('[', '')
        d = d.replace(']', '')
        c.add_run(d)
        g = doc.add_paragraph('')
        g.add_run('Synonyms for ')
        g.add_run(a1[i].upper() + ': ').bold = True
        from datamuse import datamuse
        api = datamuse.Datamuse()
        s = api.words(ml=a1[i], max=10)
        s1 = []
        for i in s:
            for j in i:
                if j == 'word':
                    s1.append(i[j])
        g.add_run(str(s1).replace('[',
                                  '').replace(']',
                                              '').replace("'",
                                                          '')).italic = True
    whitelist = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
    fileName = b.replace(' ', '')
    fileName = ''.join(filter(whitelist.__contains__, fileName))
    fileName += '.docx'
    doc.save(fileName)
    import cloudmersive_convert_api_client
    from cloudmersive_convert_api_client.rest import ApiException
    configuration = cloudmersive_convert_api_client.Configuration()
    configuration.api_key['Apikey'] = 'f0c513bc-8c00-4491-830e-3e83b015feb6'
    api_instance = cloudmersive_convert_api_client.ConvertDocumentApi(
        cloudmersive_convert_api_client.ApiClient(configuration))
    try:
        # Convert Word DOCX Document to PDF
        api_response = api_instance.convert_document_docx_to_pdf(fileName)
        file = open('static/' + fileName.replace('.docx', '.pdf'), 'wb')
        file.write(api_response)
        file.close()
    except ApiException as e:
        print(
            "Exception when calling ConvertDocumentApi->convert_document_docx_to_pdf: %s\n"
            % e)
    myFile = fileName.replace('.docx', '.pdf')
    myFile2 = myFile
    note = Note(noteFile=str(myFile2), creator=current_user)
    db.session.add(note)
    db.session.commit()
    myFile = url_for('.static', filename=myFile)
    return render_template('notes.html', myFile=myFile)