def cell_with_paragraphs(self): tc = ( a_tc().with_nsdecls() .with_child(a_p()) .with_child(a_p()) .element ) return _Cell(tc)
def add_table_fixture(self, request): tc_cxml, after_tc_cxml = request.param # the table has some overhead elements, also a blank para after since # it's in a cell. after_tc_cxml += ( '/(w:tblPr/w:tblW{w:type=auto,w:w=0},w:tblGrid),w:p)') cell = _Cell(element(tc_cxml), None) expected_xml = xml(after_tc_cxml) return cell, expected_xml
def add_table_fixture(self, request): tc_cxml, after_tc_cxml = request.param # the table has some overhead elements, also a blank para after since # it's in a cell. after_tc_cxml += ( '/(w:tblPr/w:tblW{w:type=auto,w:w=0},w:tblGrid),w:p)' ) cell = _Cell(element(tc_cxml), None) expected_xml = xml(after_tc_cxml) return cell, expected_xml
def __extract_table(table): """Extracts table data from table object""" results = [] n = 0 for tr in table._tbl.tr_lst: r = [] for tc in tr.tc_lst: for grid_span_idx in range(tc.grid_span): if tc.vMerge == ST_Merge.CONTINUE: r.append(results[n - 1][len(r) - 1]) elif grid_span_idx > 0: r.append(r[-1]) else: cell = _Cell(tc, table) r.append(cell.text.replace('\n', ' ').encode('utf8')) results.append(r) n += 1 return results
def cell_text_fixture(self): # cell ------------------------- tc = ( a_tc().with_nsdecls().with_child( a_tcPr()).with_child( a_p()).with_child( a_tbl()).with_child( a_p()) ).element cell = _Cell(tc) # text ------------------------- text = 'foobar' # expected_xml ----------------- expected_xml = ( a_tc().with_nsdecls().with_child( a_tcPr()).with_child( a_p().with_child( an_r().with_child( a_t().with_text(text)))) ).xml() return cell, text, expected_xml
def table_print(b): try: for row in b.rows: for cell in row.cells: for paragraph in cell.paragraphs: if (not len(paragraph.text) == 0): translator = Translator() tran = translator.translate(paragraph.text, dest='hi') paragraph.text = tran.text except Exception as e: for tc in b._tbl.iter_tcs(): cell = _Cell(tc, b) for b_tc in iter_block_items(cell): if isinstance(b_tc, Paragraph): if (not len(b_tc.text) == 0): try: translator = Translator() tran = translator.translate(b_tc.text, dest='hi') b_tc.text = tran.text print("_______Trasnlating______\n") except: pass
def tables_fixture(self, request): cell_cxml, expected_count = request.param cell = _Cell(element(cell_cxml), None) return cell, expected_count
def width_get_fixture(self, request): tc_cxml, expected_width = request.param cell = _Cell(element(tc_cxml), None) return cell, expected_width
def merge_fixture(self, tc_, tc_2_, parent_, merged_tc_): cell, other_cell = _Cell(tc_, parent_), _Cell(tc_2_, parent_) tc_.merge.return_value = merged_tc_ return cell, other_cell, merged_tc_
def paragraphs_fixture(self): return _Cell(element('w:tc/(w:p, w:p)'), None)
def add_paragraph_fixture(self, request): tc_cxml, after_tc_cxml = request.param cell = _Cell(element(tc_cxml), None) expected_xml = xml(after_tc_cxml) return cell, expected_xml
def alignment_set_fixture(self, request): cxml, new_value, expected_cxml = request.param cell = _Cell(element(cxml), None) expected_xml = xml(expected_cxml) return cell, new_value, expected_xml
def width_set_fixture(self, request): tc_cxml, new_value, expected_cxml = request.param cell = _Cell(element(tc_cxml), None) expected_xml = xml(expected_cxml) return cell, new_value, expected_xml
def add_table_fixture(self, request): cell = _Cell(element('w:tc/w:p'), None) expected_xml = snippet_seq('new-tbl')[1] return cell, expected_xml
def alignment_get_fixture(self, request): tc_cxml, expected_value = request.param cell = _Cell(element(tc_cxml), None) return cell, expected_value
def pre_story(input_file, un_match_tokens=None): # print(un_match_tokens) un_match_tokens = un_match_tokens.split('$zl$') try: import os result_file = input_file[:input_file.rfind('.')] + '_new' + input_file[ input_file.rfind('.'):] word_obj = docx.Document(input_file) for cur_table_index in range(len(word_obj.tables)): cur_table = word_obj.tables[cur_table_index] for cur_row_index_cur_table in range(len(cur_table.rows)): for cur_col_index_cur_row in range(len(cur_table.columns)): # # 得到当前单元格 cur_cell = cur_table.cell(cur_row_index_cur_table, cur_col_index_cur_row) cur_tc_list = cur_cell._tc cur_cell_text_list = [] for cur_tc_index in range(len(cur_tc_list) - 1, -1, -1): # 初始化变量 cur_tc = cur_tc_list[cur_tc_index] delete_and_add_element = -1 delete_and_add_element += 1 cur_tc_text = '' if isinstance(cur_tc, CT_P): # # 如果是文本段落的话, 判断段落是否空,空的话跳过,不空的话,删掉重新加 # print(cur_tc, cur_tc.r_lst) for cur_r_in_cur_tc_r_list in cur_tc.r_lst: cur_tc_text += cur_r_in_cur_tc_r_list.text # print(cur_tc, cur_tc_text, bool(cur_tc_text and not cur_tc_text.isspace())) if bool(cur_tc_text and not cur_tc_text.isspace()): # 删掉这个,并增加一个段落。 delete_and_add_element = 1 cur_cell_text_list.insert(0, cur_tc_text) else: delete_and_add_element = 0 elif isinstance(cur_tc, CT_Tbl): delete_and_add_element = 0 child_table_cur_tc = cur_tc for child_table_row in child_table_cur_tc.tr_lst: for child_table_cell in child_table_row.tc_lst: # 嵌套表的当前单元格 child_table_cell block_list_child_table_cell = list( child_table_cell.iter_block_items()) cur_cell_child_table_text_list = [] for cur_block_child_table_cell_index in range( len(block_list_child_table_cell) - 1, -1, -1): # 初始化嵌套表里的变量。 cur_block_child_table_cell = \ block_list_child_table_cell[cur_block_child_table_cell_index] delete_and_add_element_child_table = -1 delete_and_add_element_child_table += 1 cur_tc_text_child_table = '' if isinstance( cur_block_child_table_cell, CT_P): # # 如果是文本段落的话, 判断段落是否空,空的话跳过,不空的话,删掉重新加 for cur_r_in_cur_tc_r_list in cur_block_child_table_cell.r_lst: cur_tc_text_child_table += cur_r_in_cur_tc_r_list.text # import pdb # pdb.set_trace() # print(cur_tc_text_child_table, bool(cur_tc_text_child_table and not cur_tc_text_child_table.isspace())) if bool(cur_tc_text_child_table and not cur_tc_text_child_table .isspace()): # 删掉这个,并增加一个段落。 delete_and_add_element_child_table = 1 cur_cell_child_table_text_list.insert( 0, cur_tc_text_child_table) else: delete_and_add_element_child_table = 0 else: # # 如果不是CT_P,就跳过 delete_and_add_element_child_table = 0 if delete_and_add_element_child_table == 1: for p_zl in block_list_child_table_cell[ cur_block_child_table_cell_index].r_lst: p_zl.text = '' del block_list_child_table_cell[ cur_block_child_table_cell_index] # # 在嵌套表中增加段落,改颜色 for cur_add_text_index, cur_add_text in enumerate( cur_cell_child_table_text_list): cell_buffer = _Cell( child_table_cell, child_table_cur_tc) run = cell_buffer.paragraphs[ -1].add_run(cur_add_text) # if cur_add_text_index == 0: # run = cell_buffer.paragraphs[-1].add_run(cur_add_text) # else: # run = cell_buffer.paragraphs[-1].add_run(cur_add_text) run.font.name = '宋体' run.font.size = 140000 # run.font.color.rgb = RGBColor(255, 0, 0) if not cur_add_text.isspace(): run.font.highlight_color = 4 if un_match_tokens is not None and cur_add_text in un_match_tokens: run.font.highlight_color = 7 un_match_tokens.remove( cur_add_text) else: # # 其他情况 delete_and_add_element = 0 if delete_and_add_element == 1: del cur_tc_list[cur_tc_index] for cur_add_text_index, cur_add_text in enumerate( cur_cell_text_list): if cur_add_text_index == 0: run = cur_cell.add_paragraph().add_run( cur_add_text) else: run = cur_cell.paragraphs[-1].add_run(cur_add_text) run.font.name = '宋体' run.font.size = 140001 # run.font.color.rgb = RGBColor(255, 0, 0) if not cur_add_text.isspace(): run.font.highlight_color = 4 if un_match_tokens is not None and cur_add_text in un_match_tokens: run.font.highlight_color = 7 un_match_tokens.remove(cur_add_text) for i in range(len(word_obj.paragraphs)): cur_p = word_obj.paragraphs[i] cur_p_text = cur_p.text cur_p.text = '' run = cur_p.add_run(cur_p_text) if not cur_p_text.isspace(): run.font.highlight_color = 4 #print('zlzlzl', cur_p_text, un_match_tokens) if un_match_tokens is not None and cur_p_text in un_match_tokens: run.font.highlight_color = 7 un_match_tokens.remove(cur_p_text) word_obj.save(result_file) except Exception as e: result_file = input_file return result_file