Exemplo n.º 1
0
 def fasterSolution(self, b):
     self.progress['value'] = time.time() - self.start_time
     self.progress.update()
     self.document = Document()
     self.COLUMNS = 4
     self.ROWS = 1000
     self.table = self.document.add_table(rows=self.ROWS,
                                          cols=self.COLUMNS)
     self.table_cells = self.table._cells
     for i in range(self.ROWS):
         self.row_cells = self.table_cells[i * self.COLUMNS:(i + 1) *
                                           self.COLUMNS]
         for cell in self.row_cells:
             self.paragraph = cell.paragraphs[0]
             self.run = self.paragraph.add_run()
             self.run.add_picture(self.folderAddress + "\\" + self.b +
                                  ".png",
                                  width=350000 * 0.71,
                                  height=350000 * 0.49)
     self.progress['value'] = time.time() - self.start_time
     self.progress.update()
     self.document.save(self.folderAddress + "\\" + "singleFolder" +
                        "\\" + self.b + "_" + str(1) + ".docx")
     self.progress['value'] = time.time() - self.start_time
     self.progress.update()
Exemplo n.º 2
0
 def it_has_part_as_header_part(self):
     document = Document(dir_pkg_path)
     header = document.add_header()
     paragraph = header.add_paragraph()
     run = paragraph.add_run()
     run.add_floating_picture(python_powered_path)
     document.save('/home/daniel/faboozle.docx')
Exemplo n.º 3
0
    def it_removes_header_part(self):
        document = Document(dir_pkg_path)
        document.remove_headers()

        for rel_id, part in document.part.related_parts.items():
            assert part.content_type != CT.WML_HEADER

        header_elm_tag = 'w:headerReference'
        sentinel_sectPr = document._body._body.get_or_add_sectPr()
        header_elms = sentinel_sectPr.findall(qn(header_elm_tag))
        assert len(header_elms) == 0
Exemplo n.º 4
0
    def it_removes_header_part(self):
        document = Document(dir_pkg_path)
        document.remove_headers()

        for rel_id, part in document.part.related_parts.items():
            assert part.content_type != CT.WML_HEADER

        header_elm_tag = 'w:headerReference'
        sentinel_sectPr = document._body._body.get_or_add_sectPr()
        header_elms = sentinel_sectPr.findall(qn(header_elm_tag))
        assert len(header_elms) == 0
Exemplo n.º 5
0
    def it_removes_footer_part(self):
        document = Document(dir_pkg_path)
        document.remove_footers()

        for rel_id, part in document.part.related_parts.items():
            assert part.content_type != CT.WML_FOOTER

        footer_elm_tag = "w:footerReference"
        sentinel_sectPr = document._body._body.get_or_add_sectPr()
        footer_elms = sentinel_sectPr.findall(qn(footer_elm_tag))
        assert len(footer_elms) == 0
Exemplo n.º 6
0
def fasterSolution(folderAddress, pic):
    document = Document()
    COLUMNS = 4
    table = document.add_table(rows=1000, columns=COLUMNS)
    table_cells = table._cells
    for i in range(ROWS):
        row_cells = table_cells[i * COLUMNS:(i + 1) * COLUMNS]
        for cell in row_cells.cells:
            paragraph = cell.paragraphs[0]
            run = paragraph.add_run()
            run.add_picture(folderAddress + pic + ".png",
                            width=350000 * 1.42,
                            height=700000 * 0.49)
Exemplo n.º 7
0
def finalizer(filed=[]):
    files = [
        "static/merge/planning and organising.docx",
        "static/merge/customer service.docx", "static/merge/leadership.docx",
        "static/merge/commercial awareness.docx",
        "static/merge/initiative.docx",
        "static/merge/persuasive oral communication.docx"
    ]
    output_doc = Document("static/merge/nicopon.docx")
    for file in filed:
        if file in files:
            pass
        else:
            files.append(file)
    for index, file in enumerate(files):
        try:
            input_doc = Document(file)
            paro = 0
            if index < len(files):
                output_doc.add_page_break()
            for para in input_doc.paragraphs:
                get_para_data(output_doc, para, paro)
                paro += 1
        except Exception as e:
            print("mhhh")
            print(e)
    p = output_doc.add_paragraph()
    r = p.add_run()
    r.add_picture('static/end.PNG', width=Inches(7.09), height=Inches(8.76))
    output_doc.save('static/merge/report.docx')
Exemplo n.º 8
0
    def it_adds_to_doc_without_header(self):
        document = Document(dir_pkg_path)

        header = document.add_header()
        header_elm_tag = 'w:headerReference'
        sentinel_sectPr = document._body._body.get_or_add_sectPr()
        header_elms = sentinel_sectPr.findall(qn(header_elm_tag))
        assert len(header_elms) == 1

        assert header
        assert len(header.paragraphs) == 0

        header.add_paragraph('foobar')
        assert len(header.paragraphs) == 1
Exemplo n.º 9
0
 def putIntoDocumentFiles(folderAddress, pic):
     document = Document()
     table = document.add_table(rows=100, cols=4)  #ROWS=25 For 100 barcodes
     for row in table.rows:
         for cell in row.cells:
             paragraph = cell.paragraphs[0]
             run = paragraph.add_run()
             run.add_picture(
                 folderAddress + "\\" + pic + ".png",
                 width=350000 * 0.71,
                 height=350000 * 0.49
             )  #(width,height)=>dimensions(singleTableRow,singleTableColumn) for singlePage
     document.save(folderAddress + "\\singleFolder" + "\\" + pic + "_" +
                   "1" + ".docx")
Exemplo n.º 10
0
    def it_adds_to_doc_without_footer(self):
        document = Document(dir_pkg_path)
        document.remove_footers()

        footer = document.add_footer()
        footer_elm_tag = 'w:footerReference'
        sentinel_sectPr = document._body._body.get_or_add_sectPr()
        footer_elms = sentinel_sectPr.findall(qn(footer_elm_tag))
        assert len(footer_elms) == 1

        assert footer
        assert len(footer.paragraphs) == 0

        footer.add_paragraph('foobar')
        assert len(footer.paragraphs) == 1
Exemplo n.º 11
0
    def it_adds_to_doc_without_footer(self):
        document = Document(dir_pkg_path)
        document.remove_footers()

        footer = document.add_footer()
        footer_elm_tag = "w:footerReference"
        sentinel_sectPr = document._body._body.get_or_add_sectPr()
        footer_elms = sentinel_sectPr.findall(qn(footer_elm_tag))
        assert len(footer_elms) == 1

        assert footer
        assert len(footer.paragraphs) == 0

        footer.add_paragraph("foobar")
        assert len(footer.paragraphs) == 1
Exemplo n.º 12
0
 def add_picture_fixture(self, request, run_, picture_):
     document = Document()
     image_path_ = instance_mock(request, str, name='image_path_')
     width, height = 100, 200
     class_mock(request, 'docx.text.Run', return_value=run_)
     run_.add_picture.return_value = picture_
     return (document, image_path_, width, height, run_, picture_)
Exemplo n.º 13
0
        def get_grouplist(self, filename):
            doc = Document(filename)
            table = doc.tables[0]

            # Data will be a list of rows represented as dictionaries
            # containing each row's data.
            data = []

            keys = None
            for i, row in enumerate(table.rows):
                text = (cell.text for cell in row.cells)

                # Establish the mapping based on the first row
                # headers; these will become the keys of our dictionary
                if i == 0:
                    keys = tuple(text)
                    continue

                # Construct a dictionary for this row, mapping
                # keys to values for this row
                row_data = dict(zip(keys, text))
                data.append(row_data)

            grouplist = []

            for dict_ in data:
                fio = dict_.get('Фамилия, имя, отчество ')
                grouplist.append(fio)

            return grouplist
Exemplo n.º 14
0
 def __init__(
     self,
     filename,
 ):
     self.filename = filename
     self.document = Document(filename)
     self.outlist = []
Exemplo n.º 15
0
def read_table(docv):
    document = Document(docv)
    table = document.tables[1]
    table_info = document.tables[0]
    data = []
    info = []
    keys = None
    for i, row in enumerate(table_info.rows):
        text = [cell.text for cell in row.cells]
        info.append(text)

    for i, row in enumerate(table.rows):
        text = (cell.text for cell in row.cells)
        tr = [cell.text for cell in row.cells]
        hu = len(tr)
        if hu == 4:
            keys = ("number", "title", "comments", "no")
        elif hu == 3:
            keys = ("number", "title", "comments")
        elif hu == 2:
            keys = ("title", "comments")
        row_data = dict(zip(keys, text))
        data.append(row_data)
    info = [info[0][1], info[0][3], info[1][1], info[1][3]]
    namer(info[0])
    session["inf"] = info
    #info candidate,excercise,assesor,date  would be good if stored in sessions
    return data
Exemplo n.º 16
0
def parse():
    data = []
    dirpath = ent_path.get()
    for path in os.listdir():
        full_path = os.path.join(dirpath, path)
        if os.path.isfile(full_path) and path.endswith(".docx"):
            document = Document(full_path)
            if len(document.tables) == 0:
                continue
            table = document.tables[0]
            data.append([])
            flag = False
            for row in table.rows:
                for cell in iter_unique_cells(row):
                    for para in cell.paragraphs:
                        text = para.text.strip()
                        if not len(text):
                            continue
                        if text[-1] == ':':
                            data[-1].append('')
                            flag = True
                        else:
                            texts = text.split(':')
                            if len(texts) == 1 and flag:
                                data[-1][-1] += text
                            elif len(texts) > 1:
                                data[-1].append(texts[1])
                                flag = False
    df = pd.DataFrame(data, columns=keys)
    df.to_excel("output.xlsx", index=False)
Exemplo n.º 17
0
    def it_has_rel_as_footer_rel(self):
        document = Document(dir_pkg_path)
        footer_rel_exists = False
        for rel_id, rel in document.part.rels.items():
            if rel.reltype == RT.FOOTER:
                footer_rel_exists = True

        assert footer_rel_exists
Exemplo n.º 18
0
    def it_has_rel_as_header_rel(self):
        document = Document(dir_pkg_path)
        header_rel_exists = False
        for rel_id, rel in document.part.rels.items():
            if rel.reltype == RT.HEADER:
                header_rel_exists = True

        assert header_rel_exists
Exemplo n.º 19
0
def readDocxParagraph(name):
	str = ''
	document = Document(name)
	for para in document.paragraphs:
		str = str + para.text + u" endpara "
		
	str = str.lower()
	return str
Exemplo n.º 20
0
 def add_picture_fixture(self, request, Document_inline_shapes_,
                         inline_shapes_):
     width, height, expected_width, expected_height = request.param
     document = Document()
     image_path_ = instance_mock(request, str, name='image_path_')
     picture_ = inline_shapes_.add_picture.return_value
     picture_.width, picture_.height = 200, 100
     return (document, image_path_, width, height, inline_shapes_,
             expected_width, expected_height, picture_)
Exemplo n.º 21
0
def file_tables(f,filename): # returns tables in document
    # print("parse files")
    document_tables = []
    document = Document(f) #currently only supports docx files
    for t in document.tables: #m
        # print("__ table __ ")
        # print(t)
        document_tables.append(t)
    return document_tables
Exemplo n.º 22
0
    def it_has_part_as_header_part(self):
        document = Document(dir_pkg_path)
        header_part_exists = False
        for rel_id, part in document.part.related_parts.items():
            if part.content_type == CT.WML_HEADER:
                header_part_exists = True
                assert isinstance(part, XmlPart)

        assert header_part_exists
Exemplo n.º 23
0
def convert_table_to_df(document_name, table_nos=[]):
    document = Document(document_name)
    outlist = []
    if table_nos == '':
        table_nos = list(range(len(document.tables)))
    for table in table_nos:
        input_table = document.tables[table]

        table_dataframe = table_to_df(input_table)
        outlist.append(table_dataframe)
    return outlist
Exemplo n.º 24
0
 def __init__(self, context, request=None):
     """Read the docx template and initialize some instance attributes
     that will be used to compile the template
     """
     self.context = context
     self.request = request
     self.template = Document(self._template_filename)
     self.use_existing_measures = False
     self.tool_type = get_tool_type(self.context)
     self.tti = getUtility(IToolTypesInfo)
     self.italy_special = False
Exemplo n.º 25
0
def readDocxTable(name):
	str = ''
	document = Document(name)
	tables = document.tables
	for table in tables:
		for row in table.rows:
			for cell in row.cells:
				for paragraph in cell.paragraphs:
					str = str + paragraph.text + u" endpara "
	
	str = str.lower()
	return str
Exemplo n.º 26
0
def import_data(filename):
    conn = lite.connect("input.db")
    cur = conn.cursor()
    cur.execute(
        "create table IF NOT EXISTS input('id' integer, 'description' text, 'datetime' integer, 'longitute' real, 'latitude' real, 'elevation' integer)"
    )
    cur.execute("select count(*) from input")
    count = cur.fetchall()
    if (count[0][0] == 0):
        i = 1
        document = Document(filename)
        paragraph = document.paragraphs[0]
        while (len(paragraph.text) != 0):
            document = Document(filename)
            paragraph = document.paragraphs[i]
            col = re.split(r'\t+', paragraph.text)
            cur.execute("insert into input values(?,?,datetime(?),?,?,?)",
                        (col[0], col[1], col[2], col[3], col[4], col[5]))
            conn.commit()
            i = i + 1
    conn.close()
Exemplo n.º 27
0
 def __init__(self, context, request=None):
     """Read the docx template and initialize some instance attributes
     that will be used to compile the template
     """
     self.context = context
     self.request = request
     self.template = Document(self._template_filename)
     self.use_existing_measures = api.portal.get_registry_record(
         "euphorie.use_existing_measures", default=False)
     self.tool_type = get_tool_type(self.webhelpers._survey)
     self.tti = getUtility(IToolTypesInfo)
     self.italy_special = self.webhelpers.country == "it"
Exemplo n.º 28
0
def word_file_data(fileLocation):
    document = Document(fileLocation)
    table = document.tables[0]
    fileContentList = []
    for rw in table.rows:
        value = rw.cells[0].text
        if value == '':
            pass
        else:
            fileContentList.append(value)
    print(fileContentList)
    return fileContentList
Exemplo n.º 29
0
def parse_doc():
    if request.method=='POST':
        filename=request.files['img']
        #print(filename)
        txt=list()
        logging.getLogger().setLevel(logging.INFO)
        logging.info('File Name {}'.format(filename))
        fileextens=filename.filename.split('.')[1]
        print(fileextens)
        if fileextens=='docx':
            logging.info('File name ends with docx')
            doc=Document(filename)
            for para in doc.paragraphs:
                txt.append(para.text)
            full_t= ' '.join(txt)
            name_e=ex.name_extraction(full_t)
            session['name_session']=name_e
            logging.info('Extracted Name {}'.format(name_e))
            
            mob=ex.extract_mob_number(full_t)
            session['mob_session']=mob
            logging.info('Extracted Mobile  {}'.format(mob))
            
            mail=ex.extract_mail(full_t)
            session['mail_session']=mail
            logging.info('Extracted mail {}'.format(mail))
            print(session.get('mail_session'))
            skills=ex.extract_skills(doc)
            logging.info('Extracted Skills {}'.format(skills))
            session['skills_session']=skills
            
            
            if not mail or not name_e  or not mob  or not skills: 
                
                txt_doc=ex.txt_extraction(filename)
            if not mob:
                mob_doc=ex.extract_mob_number(txt_doc)
                print(mob_doc)
            if not mail:
                mail_doc=ex.extract_mail(txt_doc)
                print(mail_doc)
            if not name_e:
                name_doc=ex.name_extraction(txt_doc)
                print(name_doc)
            if not skills:
                
                url=ex.extract_linkedinurl(txt_doc)
                skills=linkedin.skills_linkdn(url)
                print(skills)
        print("skills")    
        return "Skills"
     
    return "File Not Uploaded"
Exemplo n.º 30
0
def parse_all_docxs_table():
    path = './src/data/basket_elective_docxs/'
    for filename in os.listdir(path):
        # print(filename)
        document = Document(path + filename)
        #read from docs file
        for j in range(len(document.tables)):
            table = document.tables[j]
            data = []
            keys = None
            fname = ""
            for i, row in enumerate(table.rows):
                if i != 0:
                    text = (cell.text for cell in row.cells)
                    if i == 1:
                        keys = tuple(text)
                        continue
                    row_data = dict(zip(keys, text))
                    data.append(row_data)
                else:
                    # pass
                    text = (cell.text for cell in row.cells)
                    fname = tuple(text)[0] + '.xlsx'
                    fname = fname.replace(" ", "_")
                    fname = fname.replace("/", "_")
                    fname = fname.replace(":", "_")
                    fname = './src/tmp/baskets/' + fname
                    if str(os.path.isfile(fname)) == False:
                        print(os.path.isfile(fname))
                        workbook = xlwt.Workbook(fname)
                        ws = workbook.add_sheet('Tested')
                        workbook.save(fname)
                        print(fname, ' is created')
            # print(data)
            # # print(len(data))
            # print(fname)
            workbook = xlwt.Workbook(fname)
            ws = workbook.add_sheet('Tested')
            workbook.save(fname)
            wb = openpyxl.Workbook()
            sheet = wb.active
            sheet.cell(row=1, column=1).value = 'Course code'
            sheet.cell(row=1, column=2).value = 'Course name'
            sheet.cell(row=1, column=3).value = 'L-T-P-C'
            count = 2
            for i in range(len(data)):
                sheet.cell(row=count, column=1).value = data[i][
                    'Course code'][:2] + " " + data[i]['Course code'][2:]
                sheet.cell(row=count, column=2).value = data[i]['Course name']
                sheet.cell(row=count, column=3).value = data[i]['L-T-P-C']
                count = count + 1
            wb.save(fname)
Exemplo n.º 31
0
 def fasterSolution(folderAddress, pic, progress):
     progress['value'] = time.time() - start_time
     progress.update()
     document = Document()
     COLUMNS = 4
     ROWS = 1000
     table = document.add_table(rows=ROWS, cols=COLUMNS)
     table_cells = table._cells
     for i in range(ROWS):
         row_cells = table_cells[i * COLUMNS:(i + 1) * COLUMNS]
         for cell in row_cells:
             paragraph = cell.paragraphs[0]
             run = paragraph.add_run()
             run.add_picture(folderAddress + "\\" + pic + ".png",
                             width=350000 * 0.71,
                             height=350000 * 0.49)
     progress['value'] = time.time() - start_time
     progress.update()
     document.save(folderAddress + "\\" + "singleFolder" + "\\" + pic +
                   "_" + str(1) + ".docx")
     progress['value'] = time.time() - start_time
     progress.update()
Exemplo n.º 32
0
    def it_adds_to_doc_without_header(self):
        document = Document(dir_pkg_path)

        sentinel_sectPr = document.sections[0]
        header_elm_tag = 'w:headerReference'
        header = sentinel_sectPr.add_header()
        header_elms = sentinel_sectPr.findall(qn(header_elm_tag))
        assert len(header_elms) == 1

        assert header
        assert len(header.paragraphs) == 0

        header.add_paragraph('foobar')
        assert len(header.paragraphs) == 1
Exemplo n.º 33
0
def parse_summary(assignment, file):
    document = Document(file)
    regex = re.compile("[A-Z][A-Z][A-Z] [0-9][0-9][0-9][0-9]")
    course_title = ''
    for paragraph in document.paragraphs:
        result = regex.match(paragraph.text)
        if(result):
            course_title = result.group()
            break
    if assignment:
        assignment = assignment.replace('\n', ', ')
        summary = course_title + ' ' + assignment
    else:
        summary = 'N/A'
    return summary
Exemplo n.º 34
0
    def getTable(filename, n=0):
        from docx.api import Document
        document = Document(filename)
        table = document.tables[n]
        data = []
        keys = None
        for i, row in enumerate(table.rows):
            text = (cell.text for cell in row.cells)

            if i == 0:
                keys = tuple(text)
                continue
            row_data = dict(zip(keys, text))
            data.append(row_data)
        print(data)
Exemplo n.º 35
0
def read_table(docv):
    document = Document(docv)
    table = document.tables[1]
    data = []
    keys = None
    for i, row in enumerate(table.rows):
        text = (cell.text for cell in row.cells)
        keys = ("number", "title", "comments", "no")
        '''
        if i == 0:
            keys = tuple(text)
            continue'''
        row_data = dict(zip(keys, text))
        data.append(row_data)
    return data
Exemplo n.º 36
0
 def it_should_raise_if_not_a_Word_file(self, Package_, package_, docx_):
     package_.main_document.content_type = 'foobar'
     with pytest.raises(ValueError):
         Document._open(docx_)
Exemplo n.º 37
0
 def it_opens_default_template_if_no_file_provided(
         self, Package_, default_docx_):
     Document._open(None)
     Package_.open.assert_called_once_with(default_docx_)
Exemplo n.º 38
0
 def it_can_open_a_docx_file(self, open_fixture):
     docx_, Package_, package_, document_part_ = open_fixture
     document_part, package = Document._open(docx_)
     Package_.open.assert_called_once_with(docx_)
     assert document_part is document_part
     assert package is package_