def stickers_transform(): """Transforms the CSV data into a DOCX file""" document = Document('landscape.docx') with open('master9.7.csv', 'rU') as infile: reader = csv.reader(infile) table = document.add_table(2,2) i = 0 for row in reader: index1, index2 = i / 2, i % 2 cell = table.cell(index1, index2) prod = row[3] write_line_in_cell(cell, '\n', 20) write_line_in_cell(cell, row[1]) write_line_in_cell(cell, row[2]) write_line_in_cell(cell, row[0]) write_line_in_cell(cell, row[3]) write_line_in_cell(cell, '\n', 20) i += 1 if i % 4 == 0: table = document.add_table(2,2) i = 0 document.save('labels.docx')
def scan_file(path, folder, c): print(path+"\n") document = Document(path) for table in document.tables: if folder == "generale": for row in table.rows: # codice|question|a|b|c|d|answer print("add " + row.cells[0].text) insert = (row.cells[0].text, row.cells[1].text, row.cells[2].text, row.cells[3].text, row.cells[4].text, row.cells[5].text, row.cells[6].text, folder) c.execute("INSERT INTO domande VALUES (?,?,?,?,?,?,?,?)", insert) elif folder == "sunto": for row in table.rows: # codice|question|a|b|c|d|answer print("add " + row.cells[0].text) insert = (row.cells[0].text, row.cells[1].text, row.cells[2].text, row.cells[3].text, row.cells[4].text, row.cells[5].text, row.cells[6].text, folder) c.execute("INSERT INTO domande VALUES (?,?,?,?,?,?,?,?)", insert) elif folder == "comprensione": for row in table.rows: # codice|testo(span 5)|empty # codice|question|a|b|c|d|answer if re.search('[A-Z]{2}[0-9]{3}0{2}', row.cells[0].text): # is the text print("add " + row.cells[0].text) insert = (row.cells[0].text, row.cells[1].text, None, None, None, None, None, "comprensione_a") c.execute("INSERT INTO domande VALUES (?,?,?,?,?,?,?,?)", insert) else: #question over the text print("add " + row.cells[0].text) insert = (row.cells[0].text, row.cells[1].text, row.cells[2].text, row.cells[3].text, row.cells[4].text, row.cells[5].text, row.cells[6].text, "comprensione_b") c.execute("INSERT INTO domande VALUES (?,?,?,?,?,?,?,?)", insert) document.save(path)
def parse_docx(in_file, out_file): doc = Document(in_file) for item in doc.paragraphs: # 通过段落解析内容. print item.text doc.save(out_file)
def insetImgToDocx(image): document = Document(docx_file) p = document.add_paragraph() r = p.add_run() r.add_picture(image, width=Inches(5.5)) #inces sesuai yang ada di garis r.add_text('Ini picture: {}'.format(image)) document.save(docx_file)
def create_docx(path, key, filename='pics', width=None, fontsize=9): from docx import Document from docx.shared import Inches from docx.shared import Pt files=listdir(path) file_list=[] for i in files: if i.find(key) is not -1: print ' %s is found' %i file_list.append(i) doc = Document() para = doc.add_paragraph() run = para.add_run() font = run.font font.name = 'Calibri' font.size = Pt(fontsize) if width is None: width=5.5/len(file_list) for pic in file_list: short_path=path.split('/')[-3]+'/'+path.split('/')[-2]+'/'+path.split('/')[-1] run.add_text(short_path+pic+': \n') para = doc.add_paragraph() run = para.add_run() for pic in file_list: run.add_text(' \n') run.add_picture(path+pic, width= Inches(width)) doc.save_prjt('%s.docx' % filename) print ' save %s.docs' %filename
def certificate(request, training_id, ppant_id): t = Training.objects.get(pk=training_id) p = Participant.objects.get(pk=ppant_id) document = Document('media/template.docx') for paragraph in document.paragraphs: for run in paragraph.runs: if '<<name>>' in run.text: if p.mi: run.text = p.sname + ', ' + p.fname + ' ' + p.mi else: run.text = p.sname + ', ' + p.fname if '<<module>>' in run.text: run.text = '"' + t.module.fullname + '"' if '<<date>>' in run.text: startdate = datetime.datetime.strptime(str(t.date), '%Y%m%d') enddate = startdate + datetime.timedelta(days=4) run.text = 'held from ' + startdate.strftime('%d %B') + ' to ' + enddate.strftime('%d %B %Y') + ' at the ' if '<<location>>' in run.text: l = Institution.objects.get(abbrev=t.location) run.text = l.fullname + ', ' + l.city + ', ' + l.province if '<<honors>>' in run.text: run.text = '' if '<<serial>>' in run.text: code = 'participant'+ppant_id+'training'+training_id serial = encrypt(code) run.text = 'Serial: ' + serial if '<<verification>>' in run.text: run.text = 'The authenticity of this certificate can be verified at\nhttp://db.portal.piic.org.ph/verif/' + serial + '/' document.save('media/'+serial+'.docx') os.system('libreoffice --headless --convert-to pdf media/'+serial+'.docx') os.system('mv '+serial+'.pdf static/cert/'+serial+'.pdf') os.system('rm media/'+serial+'.docx') Message.objects.create(participant=p,category=serial,medium='cert') url = static('cert/'+serial+'.pdf') return HttpResponseRedirect(url)
def modifyDocx(filePath, values): doc = Document(filePath) a,p = findField(doc, "total_tf", range(30, 40), range(5, 10)) fillVal(doc, a, p, values.totalWVAT) a,p = findField(doc, "total_vat", range(30, 40), range(5, 20)) fillVal(doc, a, p, values.totalVAT) a,p = findField(doc, "total", range(30, 40), range(10, 20)) fillVal(doc, a, p, values.total) a,p = findField(doc, "travel", range(30, 40), range(10, 20)) fillVal(doc, a, p, values.travel) a,p = findField(doc, "car", range(30, 40), range(10, 20)) fillVal(doc, a, p, values.travelByCar) a,p = findField(doc, "accom", range(30, 40), range(10, 20)) fillVal(doc, a, p, values.accomodation) a,p = findField(doc, "accom_vat", range(30, 40), range(20, 30)) fillVal(doc, a, p, values.accomodationVAT) a,p = findField(doc, "food", range(30, 40), range(20, 30)) fillVal(doc, a, p, values.subsistence) a,p = findField(doc, "food_vat", range(30, 40), range(20, 30)) fillVal(doc, a, p, values.subsistenceVAT) a,p = findField(doc, "taxi", range(30, 40), range(20, 30)) fillVal(doc, a, p, values.taxi) a,p = findField(doc, "taxi_vat", range(30, 40), range(20, 30)) fillVal(doc, a, p, values.taxiVAT) a,p = findField(doc, "fuel", range(30, 40), range(20, 30)) fillVal(doc, a, p, values.other) a,p = findField(doc, "fuel_vat", range(30, 40), range(20, 30)) fillVal(doc, a, p, values.otherVAT) a,p = findField(doc, "total_b", range(30, 40), range(30, 40)) fillVal(doc, a, p, values.total) doc.save("test2.docx")
def _compile_doc(self, template_path, fields_data, doc_name): """ Разархивация docx файла, преобразование и обратная архивация. """ doc = Document(template_path) doc.save(doc_name) docx_path = doc_name zip_path = docx_path.replace('docx', 'zip') folder_path = docx_path.replace('.docx', '') internal_path_xml = '/word/document.xml' internal_path_txt = '/word/document.txt' # docx > zip os.rename(docx_path, zip_path) # unzip zipfile.ZipFile(zip_path).extractall(path=folder_path) # remove initial zip os.remove(zip_path) # xml > txt os.rename(folder_path + internal_path_xml, folder_path + internal_path_txt) # replace_tags self._replace_tags(folder_path, internal_path_txt, fields_data) # txt > xml os.rename(folder_path + internal_path_txt, folder_path + internal_path_xml) # zip shutil.make_archive(folder_path, 'zip', folder_path) # remove folder shutil.rmtree(folder_path) # zip > docx os.rename(zip_path, docx_path)
class HouseKeeping: def __init__(self, feedback_doc_name, marking_sheet_name, marker_name): self.feedback_doc_name = feedback_doc_name self.marker_name = marker_name #load student feedback form as a template self.feeback_document = Document(feedback_doc_name) #load my marking sheet 'PT' from workbook self.marking_sheet = xlrd.open_workbook(marking_sheet_name).sheet_by_name(marker_name) #username to firstname lastname map/dictionary self.name_map = {} self.construct_name_map() self.create_new_feedback_document() #probably won't work for Windows def unzip_submission(self, student_dir): #form unzip command cmd = 'unzip -d ' + student_dir + '/ ' + student_dir + '/*.zip' print cmd, '\n\n' sys_process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sys_process.wait() std_out = sys_process.stdout.read().strip() print std_out def create_new_feedback_document(self): marker_directory = os.path.dirname(os.path.realpath(__file__))+'/'+self.marker_name for student_dir, _, file in os.walk(marker_directory): student_dir_name = os.path.relpath(student_dir, marker_directory) #print student_dir if student_dir_name is not '.': student_name = self.name_map[student_dir_name][0] + ' ' + \ self.name_map[student_dir_name][1] self.write_student_name_to_document(student_dir, student_dir_name, student_name) #just do something extra self.unzip_submission(student_dir) def write_student_name_to_document(self, student_dir, student_dir_name, student_name): #default cell for student's firstname lastname filename = self.feedback_doc_name.replace('username', student_dir_name) self.feeback_document.tables[0].cell(1,0).text = student_name self.feeback_document.save(student_dir+'/'+filename) #print student_dir+'/'+filename def construct_name_map(self): username_index = 0 is_constructing_name_map = False for i in range(self.marking_sheet.nrows): if is_constructing_name_map: username = self.marking_sheet.row_values(i)[username_index] firstname = self.marking_sheet.row_values(i)[username_index-1] lastname = self.marking_sheet.row_values(i)[username_index-2] self.name_map[username]=[firstname, lastname] elif self.marking_sheet.row_values(i).count('Username') is 1: username_index = self.marking_sheet.row_values(i).index('Username') is_constructing_name_map = True
def create(title): global DOC_NAME DOC_NAME = title document = Document() document.add_heading(DOC_NAME, 0) document.save('%s.docx' % DOC_NAME) return document
def createPatentTable(patentList, docFileName): document = Document() for section in document.sections: section.orientation = WD_ORIENT.LANDSCAPE table = document.add_table(rows=1, cols=5) fillInPatentHeader(table) fillInPatentListData(table, patentList) document.save(docFileName)
def get_project_as_word(project): """ The function will retun project info in word format @Author : Arun Gopi @date : 10/4/2016 """ document = Document() ''' Setting the top margin ''' sections = document.sections section = sections[0] section.top_margin = Inches(0.1) section.left_margin = Inches(1) section.right_margin = Inches(1) errFound = False mailItenary = False dict = {} filename = '' col1_width = 2 col2_width = 15 context_dict = {} ''' printing important note ''' font = document.add_paragraph().add_run(project.title).font font.size = Pt(10) font.underline = True font = document.add_paragraph(style='ListBullet').add_run(project.short_description).font font.size = Pt(8) font = document.add_paragraph(style='ListBullet').add_run(project.detailed_description).font font.size = Pt(8) font = document.add_paragraph(style='ListBullet').add_run('Start Date : '+str(project.start_date.strftime('%d-%m-%Y'))).font font.size = Pt(8) font = document.add_paragraph(style='ListBullet').add_run('End Date : '+str(project.end_date.strftime('%d-%m-%Y'))).font font.size = Pt(8) font = document.add_paragraph(style='ListBullet').add_run('Project Manager : '+ project.manager.first_name).font font.size = Pt(8) font = document.add_paragraph(style='ListBullet').add_run('Client Name : '+project.client_name).font font.size = Pt(8) # font = document.add_paragraph(style='ListBullet').add_run(project.createdby.first_name).font # font.size = Pt(8) # font = document.add_paragraph(style='ListBullet').add_run(project.modifiedby.first_name).font # font.size = Pt(8) filename = 'project-details' filename = filename + '.docx' filename = "-".join( filename.split()) response = HttpResponse(content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document') response['Content-Disposition'] = 'attachment; filename=' + filename document.save(response) return response
def redact(redacters, doc): redacted = Document() for p in doc.paragraphs: p_r = p.text.lower() for r in redacters: p_r = re.sub(r, '[REDACTED]', p_r) redacted.add_paragraph(p_r) return redacted
class DOCXReport(object): def __init__(self, root_path, context): self.root_path = root_path self.context = context def build_report(self): """ Build DOCX report, create content, return file in StringIO format """ fn = os.path.join(self.root_path, self.get_template_fn()) self.doc = Document(fn) self.create_content() docx = StringIO() self.doc.save(docx) docx.seek(0) return docx @abc.abstractmethod def get_template_fn(self): """ Get Word-template filename; template should contain all Word-styles used in the report. """ pass @abc.abstractmethod def create_content(self): """ Main-method called to generate the content in a Word report """ pass def setLandscape(self): """ Set the document to landscape """ section = self.doc.sections[-1] section.orientation = WD_ORIENT.LANDSCAPE section.page_width = Inches(11) section.page_height = Inches(8.5) def setMargins(self, left=None, right=None, top=None, bottom=None): """ Set the margins on the page, in inches. """ section = self.doc.sections[-1] if left: section.left_margin = Inches(left) if right: section.right_margin = Inches(right) if top: section.top_margin = Inches(top) if bottom: section.bottom_margin = Inches(bottom)
def deployaword (self, texto, nombre): document = Document() for n in texto: document.add_paragraph( str(n), style='ListBullet') document.save(nombre+'.docx')
def export_word(request): """ Export data in DOCX format http://127.0.0.1:8000/api/export/word?quote_id=100 info: http://python-docx.readthedocs.org/en/latest/user/styles-understanding.html#understanding-styles """ # try to extract a valid instance valid_obj = get_object(request) if valid_obj: response = HttpResponse(mimetype='text/docx') response['Content-Disposition'] = 'attachment; filename=' + valid_obj['filename'] + '.docx' wordDocument = Document() # --- # contents # --- if 'quote' in valid_obj.keys(): quote = valid_obj['quote'] wordDocument = writeSingleQuote(wordDocument, request, quote) elif 'document' in valid_obj.keys(): document = valid_obj['document'] for quote in document.fragment_set.all(): wordDocument = writeSingleQuote(wordDocument, request, quote) elif 'koncept' in valid_obj.keys(): koncept = valid_obj['koncept'] for intf in koncept.intfrag_set.all(): wordDocument = writeSingleQuote(wordDocument, request, intf.fragment) else: raise Http404 wordDocument.save(response) return response else: raise Http404
def writeresult(self, vm_mtu): ori_path, extract_path = self.create_file(vm_mtu) ori_docx = Document() extract_xlsx = Workbook() self.create_excel_sheet(extract_xlsx) column = 1 self.write_ori_head(ori_docx) s = HostProcess(CONF[self.pair], vm_mtu, self.output_q, 'server', self.s_cmd_q, self.s_result, self.s_check_q) c = HostProcess(CONF[self.pair], vm_mtu, self.output_q, 'client', self.c_cmd_q, self.c_result) s.start() c.start() run_cmd_times = 0 for item in CONF.cmd_list: ori_docx.add_paragraph(item.upper(), style='Heading 4') s_cmd = self.create_cmd('server', item) c_cmd = self.create_cmd('client', item) for seq in range(CONF.test_times): self.s_cmd_q.put(s_cmd) while not self.s_cmd_q.empty(): time.sleep(0.2) self.s_check_q.get() time.sleep(2) self.c_cmd_q.put(c_cmd) try: s_result = self.s_result.get(timeout=240) c_result = self.c_result.get(timeout=240) except: self.kill_process(CONF[self.pair], 'server') s_result = self.s_result.get(timeout=240) c_result = self.c_result.get(timeout=240) self.output_q.put('wait all cmd run complete') self.write_ori_to_docx(ori_docx, seq, s_cmd, c_cmd, s_result, c_result) extracted_result = self.extract_result(c_result) ws = extract_xlsx.get_sheet_by_name(u'第{}组数据'.format(str(seq+1))) for index, result_cell in enumerate(extracted_result): self.output_q.put(extracted_result) self.output_q.put(result_cell) ws.cell(row=2, column=column+index, value=result_cell) all_result = self.collect_all_result.get() all_result[seq][self.num][column+index] = result_cell self.collect_all_result.put(all_result) run_cmd_times += 1 self.output_q.put(run_cmd_times) self.all_cmd_q.put('cmd_run') while self.all_cmd_q.qsize() != run_cmd_times * self.pair_num: time.sleep(0.3) time.sleep(3) column += len(extracted_result) ori_docx.save(ori_path) extract_xlsx.save(extract_path) self.output_q.put('save_xlsx') self.s_cmd_q.put('over') self.c_cmd_q.put('over') s.join() c.join() self.output_q.put('over')
def generate_program(recordset): document = Document() for i, item in enumerate(recordset): p = document.add_paragraph(str(i + 1) + '. ' + item.author.first_name + ' ', style='NoSpacing') p.add_run(item.author.last_name + ', ').all_caps = True p.add_run(item.author.study_yr + '. kursa ' + (settings.STUDY_PROG_LVL_NULL_DICT[item.author.study_prog_lvl]).decode('utf-8')).add_break() p.add_run("\"" + item.title + "\"").add_break() p.add_run(('Zinātniskais vadītājs: ').decode( 'utf-8') + item.supervisor.title + ', ' + item.supervisor.degree + ' ' + item.supervisor.first_name + ' ' + item.supervisor.last_name).add_break() document.save('programma.docx')
def render(self, ctx, req): document = Document() self.write(ctx, req, document) # # TODO: add tsammalex license information! # d = BytesIO() document.save(d) d.seek(0) return d.read()
def generate_docx(path, outfile, lvl=1): """ Creates document and parses directory recursivly to generate docx file """ document = Document() document = create_code_style(document) add_dir_to_dox(path, document, lvl) document.save(outfile)
def WriteTiezi(self): if len(self.pages)==0: print 'Error!' document = Document() # style = document.StyleSheet # style.Fonts.append(Font("\\'cb\\'ce\\'cc\\'e5", 'modern', 134, 30)) # section = Section() # document.Sections.append(section) # tps = TextPS(font=getattr(style.Fonts, "\\'cb\\'ce\\'cc\\'e5")) for i in range(self.page_num): now_page = self.pages[i] for a in now_page: for b in a: document.add_paragraph(b.decode()) if len(a)>1: # pic_num = len(self.img[a[0]]) if self.img.has_key(a[0]): for k in self.img[a[0].decode()]: pic_name = self.getImg(k) document.add_picture(self.dirname + '/' + pic_name) document.add_paragraph('---------------------------------') name = self.url.strip().split('/') name = name[-2] + name[-1] document.save(self.dirname + '/' + name + '.docx') print "Success to dump into " + name + '.docx'
def get_text(filename): doc = Document(filename) doc.save('temp.docx') full_text = [] for p in doc.paragraphs: full_text.append(p.text) text = ' '.join(full_text).split(' ') text = [w.lower() for w in text] return text
def _save_results(filename, abstracts): document = Document() for title, content in abstracts: document.add_heading(title, level=0) for p in content.split("\r\n"): document.add_paragraph(p) document.add_page_break() document.save("./result/" + filename)
def wrap_func(paras): try: files = paras["files"].split("_@@_")[1:] for file in files: doc = Document(file) paras['doc'] = doc func(paras) doc.save(file) print "successful" except: print traceback.format_exc()
def given_a_run_inside_a_table_cell_from_source(context, cell_source): document = Document() table = document.add_table(rows=2, cols=2) if cell_source == 'Table.cell': cell = table.cell(0, 0) elif cell_source == 'Table.row.cells': cell = table.rows[0].cells[1] elif cell_source == 'Table.column.cells': cell = table.columns[1].cells[0] run = cell.paragraphs[0].add_run() context.document = document context.run = run
def toWordFile(self, filename): """Format schedule as a docx to send to the printer""" document = Document() p = document.add_paragraph() discipline = self.sessions[0].eventList[0].entries[0].discipline # Schedule details (DISCIPLINE - ##venue## \n Adjudicator - ##adjudicator##) # "##" fields denote things filled in by the user after the fact p.add_run(discipline.upper() + " - ##venue##\n").bold = True p.add_run("Adjudicator - ##adjudicator##\n").bold = True for s in self.sessions: s.toWordFile(document) document.save(filename)
def doit(): host = '192.168.1.254' user = '******' password = '******' database = 'renrenbx' mysql_init(mysql_host = host, mysql_database = database, mysql_user = user, mysql_password = password) tables = {} for column in table_schema(database): tables[column['TABLE_NAME']] = {'info':column,'columns':[]} for column in table_colume(database): tables[column['TABLE_NAME']]['columns'] += [column] document = Document() document.add_heading(database, 0) i = 0 max = len(tables) for key in sorted(tables.keys()): i = i + 1 value = int(round((i * 1.0) / max * 100)) sys.stdout.write(' [' + '#' * i + '] %s%%' % value + '\r') sys.stdout.flush() document.add_heading(key, 1) table_engine = tables[key]['info']['ENGINE'] paragraph = document.add_paragraph() paragraph.add_run(table_engine).bold = True table_comment = tables[key]['info']['TABLE_COMMENT'] paragraph = document.add_paragraph() paragraph.add_run(table_comment if table_comment else u'无注释').bold = True table = document.add_table(rows = 1, cols = 4) hdr_cells = table.rows[0].cells hdr_cells[0].text = u'字段' hdr_cells[1].text = u'主键' hdr_cells[2].text = u'类型' hdr_cells[3].text = u'注释' for column in tables[key]['columns']: row_cell = table.add_row().cells row_cell[0].text = column['COLUMN_NAME'] row_cell[1].text = column['COLUMN_KEY'] if column['COLUMN_KEY'] else '-' row_cell[2].text = column['COLUMN_TYPE'] row_cell[3].text = column['COLUMN_COMMENT'] if column['COLUMN_COMMENT'] else '-' document.save('%s-%s.docx' % (database,datetime.datetime.now().strftime("%Y%m%d%H")))
def setUp(self): d = Document() p = d.add_paragraph("Test document with") p.add_run('bold').bold = True p.add_run("and some") p.add_run("italic").italic = True p.add_run("and some") p.add_run("underline").underline = True r = p.add_run("and all three") r.bold = True r.italic = True r.underline = True d.save('test_basic_docs.docx')
def create_doc(heading_pre, date_as_string): """ Creates a document with an empty table and given heading :param heading_pre: string, the prefix of the heading (e.g. 'Conflicts ') :param date_as_string: string, the date as a string :return: """ document = Document() document.add_heading(heading_pre + date_as_string, 0) table = document.add_table(rows=1, cols=4) headings = table.rows[0].cells create_headings(headings) return document
def read_DOCX(filename): document = Document(filename) content = "" for para in document.paragraphs: content += para.text return content
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT from docx.enum.style import WD_STYLE_TYPE from docx.shared import Pt from docx.enum.table import WD_TABLE_ALIGNMENT import numpy as np from docx.oxml.ns import qn import datetime from detectconf import * import os import pandas as pd # 导入数据 df = pd.read_excel(FILENAME, index_col=None) now = datetime.datetime.now() docx = Document(MODELPATH) company = docx.paragraphs[1].add_run(text=COMPANY) company.font.size = Pt(36) company.font.bold = True sta = docx.paragraphs[7].add_run(text=STAFF) sta.font.size = Pt(22) sta.font.name = u'微软雅黑' wri = docx.paragraphs[8].add_run(text=STAFF) wri.font.size = Pt(22) wri.font.name = u'微软雅黑' dat = docx.paragraphs[13].add_run(text=now.strftime('%Y') + '年 ' + now.strftime('%m') + '月') dat.font.size = Pt(18) dat.font.name = 'Microsoft YaHei UI' last = docx.paragraphs[-1].clear()
fileorder = 0 while fileorder < len(filename): for root, dirs, files in os.walk(dirname): for file in files: if filename[fileorder] in file: dirlist.append(os.path.join(root, file)) fileorder = fileorder + 1 print(dirlist) #os.remove('sheet.docx') document = Document() #column section = document.sections[0] sectPr = section._sectPr cols = sectPr.xpath('./w:cols')[0] cols.set(qn('w:num'),'2') #margin sections = document.sections for section in sections: section.top_margin = Cm(0) section.bottom_margin = Cm(0)
conn = sqlite3.connect('../server/db/questions.db') c = conn.cursor() class Question: def __init__(self): self.question = "" self.power = "" self.answers = [] self.prompts = [] def __repr__(self): return "Power: " + self.power + " Question: " + self.question + "\n" + "Answer: " + str(self.answers) + " prompts: " + str(self.prompts) + "\n" document = Document('source3.docx') # for paragraph in document.paragraphs: # if "page" in paragraph.text.lower() or "scop" in paragraph.text.lower() or "round" in paragraph.text.lower(): # continue # # print (paragraph.text) # for run in paragraph.runs: # print (run.bold) # print(run.text) # print("+++++") # print ("--------------------------------------------") started = False in_answer = False
# while True: # writable = [] # text = (all_text[randint(1,len(all_text))]).split('ред')#)#[0:5] # for i in text: # writable.append(i+'ред') # textToWrite = ''.join(writable) # textToWrite += '\"' # if 33 <len(textToWrite)< 665: # break with open('textfiles/text/' + str(para) + '.txt', 'w') as f: f.write(textToWrite) doc = Document() style = doc.styles['Normal'] font = style.font font.name = 'Lohit Bengali' font.size = Pt(14) section = doc.sections[0] section.page_height = Mm(297) section.page_width = Mm(210) section.bottom_margin = Mm(12.7) section.top_margin = Mm(15) section.left_margin = Mm(12.7) section.right_margin = Mm(12.7) table1 = doc.add_table(rows=4, cols=2) table1.style = "TableGrid"
def re_extract(filename): doc = Document(filename) elements = [] length=len(doc.paragraphs) cache=doc.paragraphs #0 # 匹配合同编号 aa = 0 elements_index = ['合同编号'] for i in range(length): if re.findall(r'.*(?:合同编号|编号)[::](.*)号',cache[i].text): a = re.search(r'.*(?:合同编号|编号)[::](.*)号', cache[i].text) elements.append(a.group(1)) aa = 1 break if aa == 0: elements.append('') # 1 # 适用于 bb = 0 elements_index.append('适用于') for i in range(length): if re.match(r'(.*)协议', cache[i].text): a = re.search(r'(.*)协议', cache[i].text) elements.append(a.group(1)) bb = 1 break if bb == 0: elements.append('') # 2 # 协议签订日期 c = 0 elements_index.append('协议签订日期') for i in range(length): if re.match(r'.*协议签订日期[::](.*)', cache[i].text): a = re.search(r'.*协议签订日期[::](.*)', cache[i].text) elements.append(a.group(1)) c = 1 break if c == 0: elements.append('') # 3 # 协议签订地点 d = 0 elements_index.append('协议签订地点') for i in range(length): if re.match(r'.*协议签订地点[::](.*)', cache[i].text): a = re.search(r'.*协议签订地点[::](.*)', cache[i].text) elements.append(a.group(1)) d = 1 break if d == 0: elements.append('') # 4 # 转让方 pattern = '.*转 *让 *方.*[::](.*)' e = 0 elements_index.append('转让方') for i in range(length): if re.match(pattern, cache[i].text): a = re.search(pattern, cache[i].text) elements.append(a.group(1)) e = 1 break if e == 0: elements.append('') # 5 # 受让方 f = 0 elements_index.append('受让方') for i in range(length): if re.match(r'.*受 *让 *方.*[::](.*)', cache[i].text): a = re.search(r'.*受 *让 *方.*[::](.*)', cache[i].text) elements.append(a.group(1)) f = 1 break if f == 0: elements.append('') #6 # 债务方 f = 0 elements_index.append('债务方') for i in range(length): if re.match(r'.*(?:债务方|债务人)[::](.*)', cache[i].text): a = re.search(r'.*(?:债务方|债务人)[::](.*)', cache[i].text) elements.append(a.group(1)) f = 1 break if f == 0: elements.append('') # 7 # 负责人 g = 0 ggg = 0 elements_index.append('转让方负责人') pattern = '.*转 *让 *方.*[::](.*)' pattern_1 = '(.*)[::](.*)' for i in range(length): if re.match(pattern, cache[i].text): a = re.search(pattern, cache[i].text) g = 1 if re.match(pattern_1,cache[i+1].text): bsk=re.search(pattern_1,cache[i+1].text) ggg = 1 elements.append(bsk.group(2)) ggg = 1 break if g == 0: elements.append('') # 8 # 受让方负责人 f = 0 fff = 0 elements_index.append('受让方负责人') pattern = '.*受 *让 *方.*[::](.*)' pattern_1 = '(.*)[::](.*)' for i in range(length): if re.match(pattern, cache[i].text): a = re.search(pattern, cache[i].text) f = 1 if re.match(pattern_1,cache[i+1].text): bsk=re.search(pattern_1,cache[i+1].text) fff = 1 elements.append(bsk.group(2)) fff = 1 break if f == 0: elements.append('') #9 # 债务方负责人 f = 0 fff = 0 elements_index.append('债务方负责人') pattern = '.*(?:债务方|债务人)[::](.*)' pattern_1 = '(.*)[::](.*)' for i in range(length): if re.match(pattern, cache[i].text): a = re.search(pattern, cache[i].text) f = 1 if re.match(pattern_1,cache[i+1].text): bsk=re.search(pattern_1,cache[i+1].text) fff = 1 elements.append(bsk.group(2)) fff = 1 break if f == 0: elements.append('') # 10 # 住所 ii = 0 iii = 0 elements_index.append('转让方住所') pattern = '.*转 *让 *方.*[::](.*)' pattern_1 = '(.*)[::](.*)' for i in range(length): if re.match(pattern, cache[i].text): a = re.search(pattern, cache[i].text) ii = 1 if re.match(pattern_1, cache[i + 2].text): bsk = re.search(pattern_1, cache[i + 2].text) iii = 1 elements.append(bsk.group(2)) iii = 1 break if ii == 0: elements.append('') # 11 # 地址 ii = 0 iii = 0 elements_index.append('受让方住所') pattern = '.*受 *让 *方.*[::](.*)' pattern_1 = '(.*)[::](.*)' for i in range(length): if re.match(pattern, cache[i].text): a = re.search(pattern, cache[i].text) ii = 1 if re.match(pattern_1, cache[i + 2].text): bsk = re.match(pattern_1, cache[i + 2].text) iii = 1 elements.append(bsk.group(2)) iii = 1 break if ii == 0: elements.append('') #12 # 债务方住所 f = 0 fff = 0 elements_index.append('债务方住所') pattern = '.*(?:债务方|债务人)[::](.*)' pattern_1 = '(.*)[::](.*)' for i in range(length): if re.match(pattern, cache[i].text): a = re.search(pattern, cache[i].text) f = 1 if re.match(pattern_1,cache[i+2].text): bsk=re.search(pattern_1,cache[i+2].text) fff = 1 elements.append(bsk.group(2)) fff = 1 break if f == 0: elements.append('') # 13 # 账面本金余额 利息 其他债权 整体债权 k = 0 elements_index.append('账面本金余额') for para in doc.paragraphs: if re.match(r'.*小写[::](.*)元.*小写[::](.*)元.*其他债权.*小写[::](.*)元.*整体债权.*小写[::](.*)元', para.text): a = re.search(r'.*小写[::](.*)元.*小写[::](.*)元.*其他债权.*小写[::](.*)元.*整体债权.*小写[::](.*)元', para.text) elements.append(a.group(1)) k = 1 break if k == 0: elements.append("") #14 #利息 kkk = 0 elements_index.append('利息') for para in doc.paragraphs: if re.search(r'.*小写[::](.*)元.*小写[::](.*)元.*其他债权.*小写[::](.*)元.*整体债权.*小写[::](.*)元', para.text): a = re.search(r'.*小写[::](.*)元.*小写[::](.*)元.*其他债权.*小写[::](.*)元.*整体债权.*小写[::](.*)元', para.text) elements.append(a.group(2)) kkk = 1 break if kkk == 0: elements.append("") #15 #其他债权 zzz = 0 elements_index.append('其他债权') for para in doc.paragraphs: if re.match(r'.*其他债权.*小写[::](.*)元.*,.', para.text): a = re.search(r'.*其他债权.*小写[::](.*)元.*,.', para.text) elements.append(a.group(1)) zzz = 1 break if zzz == 0: elements.append("") # 16 #整体债权 zzzz = 0 elements_index.append('整体债权') for para in doc.paragraphs: if re.match(r'.*整体债权.*小写[::](.*)元.*.', para.text): a = re.search(r'.*整体债权.*小写[::](.*)元.*.', para.text) elements.append(a.group(1)) zzzz = 1 break if zzzz == 0: elements.append("") #17 #转让价款 gg = 0 elements_index.append('转让价款') for i in range(length): if re.search(r'.*转让价款为(.*)', cache[i].text): a = re.search(r'.*转让价款为(.*)', cache[i].text) elements.append(a.group(1)) gg = 1 break if gg == 0: elements.append("") # 18 # 债权金额 nnn = 0 elements_index.append('债权金额') for i in range(length): if re.search(r'.*债权金额为.*(.*)', cache[i].text): a = re.search(r'.*债权金额为(.*)', cache[i].text) elements.append(a.group(1)) nnn = 1 break if nnn == 0: elements.append('') # 19 # 违约金 nnn = 0 elements_index.append('违约金') for i in range(length): if re.search(r'(.*)违约金.*小写[::](.*)元.*.', cache[i].text): a = re.search(r'(.*)违约金.*小写[::](.*)元.*.', cache[i].text) elements.append(a.group(2)) nnn = 1 break if nnn == 0: elements.append('') # 20 # 基准日_非收购处置类 nnn = 0 elements_index.append('基准日_非标') for i in range(length): if re.search(r'.*“基准日”系(.*)', cache[i].text): a = re.search(r'.*“基准日”系(.*)', cache[i].text) elements.append(a.group(1)) nnn = 1 break if nnn == 0: elements.append('') #21 # 基准日_收购处置类 nnn = 0 elements_index.append('基准日_标准') for i in range(length): if re.search(r'.*基准日.*指(?:甲方|转让方)确定的计算标的债权账面本金及利息余额的截止日,(.*)', cache[i].text): a = re.search(r'.*基准日.*指(?:甲方|转让方)确定的计算标的债权账面本金及利息余额的截止日,(.*)', cache[i].text) elements.append(a.group(1)) nnn = 1 break if nnn == 0: elements.append('') #22 # 本息总额_收购处置合同 kkk = 0 elements_index.append('本息总额') for para in doc.paragraphs: if re.search(r'.*本息总额.*小写[::](.*)元.*,.*小写[::](.*)元.*,.*小写[::](.*)元.*', para.text): a = re.search(r'.*本息总额.*小写[::](.*)元.*,.*小写[::](.*)元.*,.*小写[::](.*)元.*', para.text) elements.append(a.group(1)) kkk = 1 break if kkk == 0: elements.append("") #23 # 本金余额_收购处置合同 kkk = 0 elements_index.append('本金余额') for para in doc.paragraphs: if re.search(r'.*本息总额.*小写[::](.*)元.*,.*小写[::](.*)元.*,.*小写[::](.*)元.*', para.text): a = re.search(r'.*本息总额.*小写[::](.*)元.*,.*小写[::](.*)元.*,.*小写[::](.*)元.*', para.text) elements.append(a.group(2)) kkk = 1 break if kkk == 0: elements.append("") #24 # 本息总额_收购处置合同 kkk = 0 elements_index.append('欠息') for para in doc.paragraphs: if re.search(r'.*本息总额.*小写[::](.*)元.*,.*本金余额.*小写[::](.*)元.*小写[::](.*)元.*', para.text): a = re.search(r'.*本息总额.*小写[::](.*)元.*,.*本金余额.*小写[::](.*)元.*小写[::](.*)元.*', para.text) elements.append(a.group(3)) kkk = 1 break if kkk == 0: elements.append("") # 25 # 开户银行 ll = 0 elements_index.append('转让方开户银行') for i in range(length): if re.match(r'.*(?:开户银行|开户行)[::](.*)', cache[i].text): a = re.search(r'.*(?:开户银行|开户行)[::](.*)', cache[i].text) elements.append(a.group(1)) ll = 1 break if ll == 0: elements.append('') #26 # 户名 m = 0 elements_index.append('转让方户名') for i in range(length): if re.match(r'.*户 *名[::](.*)', cache[i].text): a = re.search(r'.*户 *名[::](.*)', cache[i].text) elements.append(a.group(1)) m = 1 break if m == 0: elements.append('') #27 # 账户 ii = 0 iii = 0 elements_index.append('转让方账号') pattern = '.*户 *名[::](.*)' pattern_1 = '(.*)[::](.*)' for i in range(length): if re.match(pattern, cache[i].text): a = re.search(pattern, cache[i].text) ii = 1 if re.match(pattern_1, cache[i + 1].text): bsk = re.match(pattern_1, cache[i + 1].text) iii = 1 elements.append(bsk.group(2)) iii = 1 break if ii == 0: elements.append('') # 28 # 交易保证金 ii = 0 iii = 0 elements_index.append('交易保证金详情') pattern = '交易保证金' pattern_1 = '(.*),(.*)' for i in range(length): if re.search(pattern, cache[i].text): a = re.search(pattern, cache[i].text) ii = 1 if re.match(pattern_1, cache[i + 1].text): bsk = re.search(pattern_1, cache[i+1].text) elements.append(bsk.group(2)) iii = 1 break if ii == 0: elements.append('') dic = {'element_index': elements_index, 'elements': elements} print(dic) elements_d = DataFrame(dic) print(elements_d) return dic
class DocxTemplate(object): """ Class for managing docx files as they were jinja2 templates """ HEADER_URI = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" FOOTER_URI = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" def __init__(self, docx): self.docx = Document(docx) self.crc_to_new_media = {} self.crc_to_new_embedded = {} self.zipname_to_replace = {} self.pic_to_replace = {} self.pic_map = {} def __getattr__(self, name): return getattr(self.docx, name) def xml_to_string(self, xml, encoding='unicode'): # Be careful : pretty_print MUST be set to False, otherwise patch_xml() # won't work properly return etree.tostring(xml, encoding='unicode', pretty_print=False) def get_docx(self): return self.docx def get_xml(self): return self.xml_to_string(self.docx._element.body) def write_xml(self, filename): with open(filename, 'w') as fh: fh.write(self.get_xml()) def patch_xml(self, src_xml): # strip all xml tags inside {% %} and {{ }} that MS word can insert # into xml source also unescape html entities src_xml = re.sub( r'(?<={)(<[^>]*>)+(?=[\{%])|(?<=[%\}])(<[^>]*>)+(?=\})', '', src_xml, flags=re.DOTALL) def striptags(m): return re.sub('</w:t>.*?(<w:t>|<w:t [^>]*>)', '', m.group(0), flags=re.DOTALL) src_xml = re.sub(r'{%(?:(?!%}).)*|{{(?:(?!}}).)*', striptags, src_xml, flags=re.DOTALL) # manage table cell colspan def colspan(m): cell_xml = m.group(1) + m.group(3) cell_xml = re.sub( r'<w:r[ >](?:(?!<w:r[ >]).)*<w:t></w:t>.*?</w:r>', '', cell_xml, flags=re.DOTALL) cell_xml = re.sub(r'<w:gridSpan[^/]*/>', '', cell_xml, count=1) return re.sub(r'(<w:tcPr[^>]*>)', r'\1<w:gridSpan w:val="{{%s}}"/>' % m.group(2), cell_xml) src_xml = re.sub( r'(<w:tc[ >](?:(?!<w:tc[ >]).)*){%\s*colspan\s+([^%]*)\s*%}(.*?</w:tc>)', colspan, src_xml, flags=re.DOTALL) # manage table cell background color def cellbg(m): cell_xml = m.group(1) + m.group(3) cell_xml = re.sub( r'<w:r[ >](?:(?!<w:r[ >]).)*<w:t></w:t>.*?</w:r>', '', cell_xml, flags=re.DOTALL) cell_xml = re.sub(r'<w:shd[^/]*/>', '', cell_xml, count=1) return re.sub( r'(<w:tcPr[^>]*>)', r'\1<w:shd w:val="clear" w:color="auto" w:fill="{{%s}}"/>' % m.group(2), cell_xml) src_xml = re.sub( r'(<w:tc[ >](?:(?!<w:tc[ >]).)*){%\s*cellbg\s+([^%]*)\s*%}(.*?</w:tc>)', cellbg, src_xml, flags=re.DOTALL) # avoid {{r and {%r tags to strip MS xml tags too far # ensure space preservation when splitting src_xml = re.sub(r'<w:t>((?:(?!<w:t>).)*)({{r\s.*?}}|{%r\s.*?%})', r'<w:t xml:space="preserve">\1\2', src_xml, flags=re.DOTALL) src_xml = re.sub( r'({{r\s.*?}}|{%r\s.*?%})', r'</w:t></w:r><w:r><w:t xml:space="preserve">\1</w:t></w:r><w:r><w:t xml:space="preserve">', src_xml, flags=re.DOTALL) # {%- will merge with previous paragraph text src_xml = re.sub(r'</w:t>(?:(?!</w:t>).)*?{%-', '{%', src_xml, flags=re.DOTALL) # -%} will merge with next paragraph text src_xml = re.sub(r'-%}(?:(?!<w:t[ >]).)*?<w:t[^>]*?>', '%}', src_xml, flags=re.DOTALL) for y in ['tr', 'tc', 'p', 'r']: # replace into xml code the row/paragraph/run containing # {%y xxx %} or {{y xxx}} template tag # by {% xxx %} or {{ xx }} without any surronding <w:y> tags : # This is mandatory to have jinja2 generating correct xml code pat = r'<w:%(y)s[ >](?:(?!<w:%(y)s[ >]).)*({%%|{{)%(y)s ([^}%%]*(?:%%}|}})).*?</w:%(y)s>' % { 'y': y } src_xml = re.sub(pat, r'\1 \2', src_xml, flags=re.DOTALL) # add vMerge # use {% vm %} to make this table cell and its copies be vertically merged within a {% for %} def v_merge_tc(m): def v_merge(m1): return ( '<w:vMerge w:val="{% if loop.first %}restart{% else %}continue{% endif %}"/>' + m1.group(1) + # Everything between ``</w:tcPr>`` and ``<w:t>``. "{% if loop.first %}" + m1.group(2) + # Everything before ``{% vm %}``. m1.group(3) + # Everything after ``{% vm %}``. "{% endif %}" + m1.group(4) # ``</w:t>``. ) return re.sub( r'(</w:tcPr[ >].*?<w:t(?:.*?)>)(.*?)(?:{%\s*vm\s*%})(.*?)(</w:t>)', v_merge, m.group( ), # Everything between ``</w:tc>`` and ``</w:tc>`` with ``{% vm %}`` inside. flags=re.DOTALL, ) src_xml = re.sub( r'<w:tc[ >](?:(?!<w:tc[ >]).)*?{%\s*vm\s*%}.*?</w:tc[ >]', v_merge_tc, src_xml, flags=re.DOTALL) # Use ``{% hm %}`` to make table cell become horizontally merged within # a ``{% for %}``. def h_merge_tc(m): xml_to_patch = m.group( ) # Everything between ``</w:tc>`` and ``</w:tc>`` with ``{% hm %}`` inside. def with_gridspan(m1): return (m1.group(1) + # ``w:gridSpan w:val="``. '{{ ' + m1.group(2) + ' * loop.length }}' + # Content of ``w:val``, multiplied by loop length. m1.group(3) # Closing quotation mark. ) def without_gridspan(m2): return ('<w:gridSpan w:val="{{ loop.length }}"/>' + m2.group(1) + # Everything between ``</w:tcPr>`` and ``<w:t>``. m2.group(2) + # Everything before ``{% hm %}``. m2.group(3) + # Everything after ``{% hm %}``. m2.group(4) # ``</w:t>``. ) if re.search(r'w:gridSpan', xml_to_patch): # Simple case, there's already ``gridSpan``, multiply its value. xml = re.sub( r'(w:gridSpan w:val=")(\d+)(")', with_gridspan, xml_to_patch, flags=re.DOTALL, ) xml = re.sub( r'{%\s*hm\s*%}', '', xml, # Patched xml. flags=re.DOTALL, ) else: # There're no ``gridSpan``, add one. xml = re.sub( r'(</w:tcPr[ >].*?<w:t(?:.*?)>)(.*?)(?:{%\s*hm\s*%})(.*?)(</w:t>)', without_gridspan, xml_to_patch, flags=re.DOTALL, ) # Discard every other cell generated in loop. return "{% if loop.first %}" + xml + "{% endif %}" src_xml = re.sub( r'<w:tc[ >](?:(?!<w:tc[ >]).)*?{%\s*hm\s*%}.*?</w:tc[ >]', h_merge_tc, src_xml, flags=re.DOTALL) def clean_tags(m): return (m.group(0).replace(r"‘", "'").replace( '<', '<').replace('>', '>').replace(u'“', u'"').replace( u'”', u'"').replace(u"‘", u"'").replace(u"’", u"'")) src_xml = re.sub(r'(?<=\{[\{%])(.*?)(?=[\}%]})', clean_tags, src_xml) return src_xml def render_xml(self, src_xml, context, jinja_env=None): src_xml = src_xml.replace(r'<w:p>', '\n<w:p>') try: if jinja_env: template = jinja_env.from_string(src_xml) else: template = Template(src_xml) dst_xml = template.render(context) except TemplateError as exc: if hasattr(exc, 'lineno') and exc.lineno is not None: line_number = max(exc.lineno - 4, 0) exc.docx_context = map( lambda x: re.sub(r'<[^>]+>', '', x), src_xml.splitlines()[line_number:(line_number + 7)]) raise exc dst_xml = dst_xml.replace('\n<w:p>', '<w:p>') dst_xml = (dst_xml.replace('{_{', '{{').replace('}_}', '}}').replace( '{_%', '{%').replace('%_}', '%}')) return dst_xml def build_xml(self, context, jinja_env=None): xml = self.get_xml() xml = self.patch_xml(xml) xml = self.render_xml(xml, context, jinja_env) return xml def map_tree(self, tree): root = self.docx._element body = root.body root.replace(body, tree) def get_headers_footers_xml(self, uri): for relKey, val in self.docx._part._rels.items(): if (val.reltype == uri) and (val.target_part.blob): yield relKey, self.xml_to_string( parse_xml(val.target_part.blob)) def get_headers_footers_encoding(self, xml): m = re.match(r'<\?xml[^\?]+\bencoding="([^"]+)"', xml, re.I) if m: return m.group(1) return 'utf-8' def build_headers_footers_xml(self, context, uri, jinja_env=None): for relKey, xml in self.get_headers_footers_xml(uri): encoding = self.get_headers_footers_encoding(xml) xml = self.patch_xml(xml) xml = self.render_xml(xml, context, jinja_env) yield relKey, xml.encode(encoding) def map_headers_footers_xml(self, relKey, xml): part = self.docx._part._rels[relKey].target_part new_part = XmlPart.load(part.partname, part.content_type, xml, part.package) for rId, rel in part.rels.items(): new_part.load_rel(rel.reltype, rel._target, rel.rId, rel.is_external) self.docx._part._rels[relKey]._target = new_part def render(self, context, jinja_env=None, autoescape=False): if autoescape: if not jinja_env: jinja_env = Environment(autoescape=autoescape) else: jinja_env.autoescape = autoescape # Body xml_src = self.build_xml(context, jinja_env) # fix tables if needed tree = self.fix_tables(xml_src) self.map_tree(tree) # Headers headers = self.build_headers_footers_xml(context, self.HEADER_URI, jinja_env) for relKey, xml in headers: self.map_headers_footers_xml(relKey, xml) # Footers footers = self.build_headers_footers_xml(context, self.FOOTER_URI, jinja_env) for relKey, xml in footers: self.map_headers_footers_xml(relKey, xml) # using of TC tag in for cycle can cause that count of columns does not # correspond to real count of columns in row. This function is able to fix it. def fix_tables(self, xml): parser = etree.XMLParser(recover=True) tree = etree.fromstring(xml, parser=parser) # get namespace ns = '{' + tree.nsmap['w'] + '}' # walk trough xml and find table for t in tree.iter(ns + 'tbl'): tblGrid = t.find(ns + 'tblGrid') columns = tblGrid.findall(ns + 'gridCol') to_add = 0 # walk trough all rows and try to find if there is higher cell count for r in t.iter(ns + 'tr'): cells = r.findall(ns + 'tc') if (len(columns) + to_add) < len(cells): to_add = len(cells) - len(columns) # is neccessary to add columns? if to_add > 0: # at first, calculate width of table according to columns # (we want to preserve it) width = 0.0 new_average = None for c in columns: if not c.get(ns + 'w') is None: width += float(c.get(ns + 'w')) # try to keep proportion of table if width > 0: old_average = width / len(columns) new_average = width / (len(columns) + to_add) # scale the old columns for c in columns: c.set( ns + 'w', str( int( float(c.get(ns + 'w')) * new_average / old_average))) # add new columns for i in range(to_add): etree.SubElement(tblGrid, ns + 'gridCol', {ns + 'w': str(int(new_average))}) # Refetch columns after columns addition. columns = tblGrid.findall(ns + 'gridCol') columns_len = len(columns) cells_len_max = 0 def get_cell_len(total, cell): tc_pr = cell.find(ns + 'tcPr') grid_span = None if tc_pr is None else tc_pr.find(ns + 'gridSpan') if grid_span is not None: return total + int(grid_span.get(ns + 'val')) return total + 1 # Calculate max of table cells to compare with `gridCol`. for r in t.iter(ns + 'tr'): cells = r.findall(ns + 'tc') cells_len = functools.reduce(get_cell_len, cells, 0) cells_len_max = max(cells_len_max, cells_len) to_remove = columns_len - cells_len_max # If after the loop, there're less columns, than # originally was, remove extra `gridCol` declarations. if to_remove > 0: # Have to keep track of the removed width to scale the # table back to its original width. removed_width = 0.0 for c in columns[-to_remove:]: removed_width += float(c.get(ns + 'w')) tblGrid.remove(c) columns_left = tblGrid.findall(ns + 'gridCol') # Distribute `removed_width` across all columns that has # left after extras removal. extra_space = 0 if len(columns_left) > 0: extra_space = removed_width / len(columns_left) extra_space = int(extra_space) for c in columns_left: c.set(ns + 'w', str(int(float(c.get(ns + 'w')) + extra_space))) return tree def new_subdoc(self, docpath=None): return Subdoc(self, docpath) @staticmethod def get_file_crc(file_obj): if hasattr(file_obj, 'read'): buf = file_obj.read() else: with open(file_obj, 'rb') as fh: buf = fh.read() crc = (binascii.crc32(buf) & 0xFFFFFFFF) return crc def replace_media(self, src_file, dst_file): """Replace one media by another one into a docx This has been done mainly because it is not possible to add images in docx header/footer. With this function, put a dummy picture in your header/footer, then specify it with its replacement in this function using the file path or file-like objects. Syntax: tpl.replace_media('dummy_media_to_replace.png','media_to_paste.jpg') -- or -- tpl.replace_media(io.BytesIO(image_stream), io.BytesIO(new_image_stream)) Note: for images, the aspect ratio will be the same as the replaced image Note2: it is important to have the source media file as it is required to calculate its CRC to find them in the docx """ crc = self.get_file_crc(src_file) if hasattr(dst_file, 'read'): self.crc_to_new_media[crc] = dst_file.read() else: with open(dst_file, 'rb') as fh: self.crc_to_new_media[crc] = fh.read() def replace_pic(self, embedded_file, dst_file): """Replace embedded picture with original-name given by embedded_file. (give only the file basename, not the full path) The new picture is given by dst_file (either a filename or a file-like object) Notes: 1) embedded_file and dst_file must have the same extension/format in case dst_file is a file-like object, no check is done on format compatibility 2) the aspect ratio will be the same as the replaced image 3) There is no need to keep the original file (this is not the case for replace_embedded and replace_media) """ if hasattr(dst_file, 'read'): # NOTE: file extension not checked self.pic_to_replace[embedded_file] = dst_file.read() else: emp_path, emb_ext = os.path.splitext(embedded_file) dst_path, dst_ext = os.path.splitext(dst_file) if emb_ext != dst_ext: raise ValueError('replace_pic: extensions must match') with open(dst_file, 'rb') as fh: self.pic_to_replace[embedded_file] = fh.read() def replace_embedded(self, src_file, dst_file): """Replace one embdded object by another one into a docx This has been done mainly because it is not possible to add images in docx header/footer. With this function, put a dummy picture in your header/footer, then specify it with its replacement in this function Syntax: tpl.replace_embedded('dummy_doc.docx','doc_to_paste.docx') Note2 : it is important to have the source file as it is required to calculate its CRC to find them in the docx """ with open(dst_file, 'rb') as fh: crc = self.get_file_crc(src_file) self.crc_to_new_embedded[crc] = fh.read() def replace_zipname(self, zipname, dst_file): """Replace one file in the docx file First note that a MSWord .docx file is in fact a zip file. This method can be used to replace document embedded in the docx template. Some embedded document may have been modified by MSWord while saving the template : thus replace_embedded() cannot be used as CRC is not the same as the original file. This method works for embdded MSWord file like Excel or PowerPoint file, but won't work for others like PDF, Python or even Text files : For these ones, MSWord generate an oleObjectNNN.bin file which is no use to be replaced as it is encoded. Syntax: tpl.replace_zipname( 'word/embeddings/Feuille_Microsoft_Office_Excel1.xlsx', 'my_excel_file.xlsx') The zipname is the one you can find when you open docx with WinZip, 7zip (Windows) or unzip -l (Linux). The zipname starts with "word/embeddings/". Note that the file is renamed by MSWord, so you have to guess a little bit... """ with open(dst_file, 'rb') as fh: self.zipname_to_replace[zipname] = fh.read() def post_processing(self, docx_file): if (self.crc_to_new_media or self.crc_to_new_embedded or self.zipname_to_replace): if hasattr(docx_file, 'read'): tmp_file = io.BytesIO() DocxTemplate(docx_file).save(tmp_file) tmp_file.seek(0) docx_file.seek(0) docx_file.truncate() docx_file.seek(0) else: tmp_file = '%s_docxtpl_before_replace_medias' % docx_file os.rename(docx_file, tmp_file) with zipfile.ZipFile(tmp_file) as zin: with zipfile.ZipFile(docx_file, 'w') as zout: for item in zin.infolist(): buf = zin.read(item.filename) if item.filename in self.zipname_to_replace: zout.writestr( item, self.zipname_to_replace[item.filename]) elif (item.filename.startswith('word/media/') and item.CRC in self.crc_to_new_media): zout.writestr(item, self.crc_to_new_media[item.CRC]) elif (item.filename.startswith('word/embeddings/') and item.CRC in self.crc_to_new_embedded): zout.writestr(item, self.crc_to_new_embedded[item.CRC]) else: zout.writestr(item, buf) if not hasattr(tmp_file, 'read'): os.remove(tmp_file) if hasattr(docx_file, 'read'): docx_file.seek(0) def pre_processing(self): if self.pic_to_replace: self.build_pic_map() # Do the actual replacement for embedded_file, stream in six.iteritems(self.pic_to_replace): if embedded_file not in self.pic_map: raise ValueError( 'Picture "%s" not found in the docx template' % embedded_file) self.pic_map[embedded_file][1]._blob = stream def build_pic_map(self): """Searches in docx template all the xml pictures tag and store them in pic_map dict""" if self.pic_to_replace: # Main document part = self.docx.part self.pic_map.update(self._img_filename_to_part(part)) # Header/Footer for relid, rel in six.iteritems(self.docx.part.rels): if rel.reltype in (REL_TYPE.HEADER, REL_TYPE.FOOTER): self.pic_map.update( self._img_filename_to_part(rel.target_part)) def get_pic_map(self): return self.pic_map def _img_filename_to_part(self, doc_part): et = etree.fromstring(doc_part.blob) part_map = {} gds = et.xpath('//a:graphic/a:graphicData', namespaces=docx.oxml.ns.nsmap) for gd in gds: rel = None # Either IMAGE, CHART, SMART_ART, ... try: if gd.attrib['uri'] == docx.oxml.ns.nsmap['pic']: # Either PICTURE or LINKED_PICTURE image blip = gd.xpath('pic:pic/pic:blipFill/a:blip', namespaces=docx.oxml.ns.nsmap)[0] dest = blip.xpath('@r:embed', namespaces=docx.oxml.ns.nsmap) if len(dest) > 0: rel = dest[0] else: continue else: continue # title=inl.xpath('wp:docPr/@title',namespaces=docx.oxml.ns.nsmap)[0] name = gd.xpath('pic:pic/pic:nvPicPr/pic:cNvPr/@name', namespaces=docx.oxml.ns.nsmap)[0] part_map[name] = (doc_part.rels[rel].target_ref, doc_part.rels[rel].target_part) # FIXME: figure out what exceptions are thrown here and catch more specific exceptions except Exception: continue return part_map def build_url_id(self, url): return self.docx._part.relate_to(url, REL_TYPE.HYPERLINK, is_external=True) def save(self, filename, *args, **kwargs): self.pre_processing() self.docx.save(filename, *args, **kwargs) self.post_processing(filename) def get_undeclared_template_variables(self, jinja_env=None): xml = self.get_xml() xml = self.patch_xml(xml) for uri in [self.HEADER_URI, self.FOOTER_URI]: for relKey, _xml in self.get_headers_footers_xml(uri): xml += self.patch_xml(_xml) if jinja_env: env = jinja_env else: env = Environment() parse_content = env.parse(xml) return meta.find_undeclared_variables(parse_content) undeclared_template_variables = property(get_undeclared_template_variables)
#%%生成文件夹 report_path = '报告输出/' os.makedirs(report_path + manager) for i in dic_target: temp = dic_target[i] if len(temp) > 0: os.makedirs(report_path + manager + '/' + i) #%%生成报告 from docx import Document for i in dic_target: temp = dic_target[i] if len(temp) > 0: report_path_f = report_path + manager + '/' + i + '/' for name in temp['名称']: Doc = Document() df_temp = temp[temp.名称 == name] Name = Doc.add_heading(name + '报告速递', level=1) stock_info = Doc.add_heading('\n个股信息:', level=2) sql = ''' \t股票上市代码为: %s \t 股票申购代码 %s \t 上市时间为: %s \t股票发行价格:%s \t \ 共有 %s 家机构做出股价预测, 预测值为 %s 元/股 '''%(df_temp.代码.values, df_temp.申购代码.values, df_temp.上市日期.values, df_temp.发行价格.values,\ df_temp.研报预测个数.values, df_temp.研报预测价格.values) stock_info.add_run(sql) stock_news = Doc.add_heading('\n个股新闻', level=2) sql = ''' \t 收集个股新闻20个,与股东相关3个,行业相关1个,有效信息 %s 个,其中正面新闻 %s 个,负面新闻 %s 个 ''' % (df_temp.正面新闻.values + df_temp.负面新闻.values,
#!/usr/bin/python # -*- coding: UTF-8 -*- from docx import Document from docx.shared import Pt from docx.oxml.ns import qn from docx.shared import Inches document = Document() document.add_heading('Report',0) paragraph = document.add_paragraph('') run = paragraph.add_run('begin: 2018-07-14 10:14:27\n') run.font.size=Pt(12) run = paragraph.add_run('end: 2018-07-14 10:15:09') run.font.size=Pt(12) document.add_heading('Testing Parameters',1) paragraph = document.add_paragraph('') run = paragraph.add_run('Concurrent threads: 10\n') run.font.size=Pt(12) run = paragraph.add_run('Url: http://10.100.34.73:8082/v1/getComparedAPI/') run.font.size=Pt(12) document.add_heading('Statistics',1) document.add_picture('/Users/miguel/out.png') paragraph = document.add_paragraph('') run = paragraph.add_run('Get Success: 479\n') run.font.size=Pt(12) run = paragraph.add_run('max:2.14 min:0.48 average:0.510\n\n') run.font.size=Pt(12) run = paragraph.add_run('[ 0.48 , 0.98 ): 475\n') run.font.size=Pt(12)
def __init__(self, tpl, docpath=None): self.tpl = tpl self.docx = tpl.get_docx() self.subdocx = Document(docpath) self.subdocx._part = self.docx._part
def get_input_xls_file_content(source_file): if not source_file: source_file = '/opt/techgig/MLTickets/docs/SampleInputDoc1-FAQs.docx' doc_obj = Document(source_file) paras = doc_obj.paragraphs return paras
def resumeDocxConstructor(): alignment_dict = { 'justify': WD_PARAGRAPH_ALIGNMENT.JUSTIFY, 'center': WD_PARAGRAPH_ALIGNMENT.CENTER, 'right': WD_PARAGRAPH_ALIGNMENT.RIGHT, 'left': WD_PARAGRAPH_ALIGNMENT.LEFT } document = Document() sections = document.sections for section in sections: section.top_margin = Inches(1) section.bottom_margin = Inches(1) section.left_margin = Inches(1) section.right_margin = Inches(1) #### Temp #### - Eventually this information will be passed by HTTP trigger resumeDirectory = f"{os.getcwd()}/JSON" addressDoc = f"{resumeDirectory}/address.json" resumeDoc = f"{resumeDirectory}/resume.json" userDoc = f"{resumeDirectory}/user.json" with open(addressDoc, "r") as read_file: address = json.load(read_file) with open(resumeDoc, "r") as read_file: resume = json.load(read_file) with open(userDoc, "r") as read_file: user = json.load(read_file) resumeOrder = [ 1, 2, 3, 4, 5 ] # This list will determine the order in which sections are added to the resume # should eventually get this from the JSON includeAddress = True # This param will be given in http trigger font = 'Calibri' #### *Temp* #### #### Define Functions #### # add_content function adapted From https://stackoverflow.com/questions/48150222/changing-paragraph-formatting-in-python-docx def add_content(content, space_after, font_name=font, font_size=16, line_spacing=0, space_before=0, align='left', keep_together=True, keep_with_next=False, page_break_before=False, widow_control=False, set_bold=False, set_italic=False, set_underline=False, set_all_caps=False, style_name="", firstline_indent=0.0, left_indent=0.0): paragraph = document.add_paragraph(content) paragraph.style = document.styles.add_style(style_name, WD_STYLE_TYPE.PARAGRAPH) font = paragraph.style.font font.name = font_name font.size = Pt(font_size) font.bold = set_bold font.italic = set_italic font.all_caps = set_all_caps font.underline = set_underline paragraph_format = paragraph.paragraph_format paragraph_format.alignment = alignment_dict.get(align.lower()) paragraph_format.space_before = Pt(space_before) paragraph_format.space_after = Pt(space_after) paragraph_format.line_spacing = line_spacing paragraph_format.keep_together = keep_together paragraph_format.keep_with_next = keep_with_next paragraph_format.page_break_before = page_break_before paragraph_format.widow_control = widow_control paragraph_format.first_line_indent = Inches(firstline_indent) paragraph_format.left_indent = Inches(left_indent) def generateHeader( ): # Name, Address, Contact Info, *perhaps should add websites here* # Insert Name add_content(f"{user['FName']} {user['LName']}", align='Center', space_before=0, space_after=0, line_spacing=1, font_size=16, set_bold=True, set_all_caps=True, style_name="NameBold", firstline_indent=0.0, left_indent=0.0) # Insert Address if includeAddress: if address['Address']['Country'] != 'USA': country = address['Address']['Country'] else: country = "" add_content( f"{address['Address']['Line1']} {address['Address']['Line2']}\n" f"{address['Address']['City']}, {address['Address']['State']} {address['Address']['Zip']} {country}", align='Center', space_before=0, space_after=0, line_spacing=1, font_size=12, set_bold=False, set_all_caps=False, style_name="addressNotBold", firstline_indent=0.0, left_indent=0.0) # Insert Contact Info add_content(f"Phone: {user['Phone']}\n" f"Email: {user['Email']}", align='Center', space_before=0, space_after=0, line_spacing=1, font_size=12, set_bold=False, set_all_caps=False, style_name="contactNotBold", firstline_indent=0.0, left_indent=0.0) def generateObjective(): # Insert Objective Statement add_content(f"\nObjective:", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=14, set_bold=True, set_all_caps=False, style_name="ObjectiveBold", firstline_indent=0.0, left_indent=0.0) add_content(f"{resume['Objective Statement']}\n", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=12, set_bold=False, set_all_caps=False, style_name="ObjectiveStatement", firstline_indent=0.0, left_indent=0.5) def generateEducation(): # Insert Education add_content(f"Education:", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=14, set_bold=True, set_all_caps=False, style_name="EducationBold", firstline_indent=0.0, left_indent=0.0) j = 0 for i in resume['School']: add_content( f"\t{i['Name']} - {i['City']}, {i['State']}, {i['Country']}\n" f"\tMajor: {i['Major']}\t\tGraduation: {i['Graduation']}\n" f"\tMinor: {i['Minor']}\n" f"\tGPA: {i['Gpa']}\n", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=12, set_bold=False, set_all_caps=False, style_name=f"ObjectiveStatement{j}", firstline_indent=0.0, left_indent=0.0) j += 1 def generateCoursework(): # Insert Relevant Coursework add_content(f"Relevant Coursework:", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=14, set_bold=True, set_all_caps=False, style_name="CourseworkBold", firstline_indent=0.0, left_indent=0.0) j = 0 for i in resume['RelevantCourse']: add_content(f"\t+ {i['Name']}", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=12, set_bold=False, set_all_caps=False, style_name=f"CourseName{j}", firstline_indent=0.0, left_indent=0.0) add_content(f"{i['Description']}", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=10, set_bold=False, set_all_caps=False, style_name=f"CourseDes{j}", firstline_indent=0.0, left_indent=1.0) j += 1 add_content(f"", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=14, set_bold=True, set_all_caps=False, style_name="CourseworkBlankLine", firstline_indent=0.0, left_indent=0.0) def generateSkills( ): # Should experience level be included or is that more for analysis # Insert Skills - This section is just bad and needs work, might be worth looking at tables add_content(f"Skills:", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=14, set_bold=True, set_all_caps=False, style_name="SkillsBold", firstline_indent=0.0, left_indent=0.0) for i in resume['Skill']: add_content(f"\t{i}", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=12, set_bold=False, set_all_caps=False, style_name=f"{i}", firstline_indent=0.0, left_indent=0.0) for j in resume['Skill'][i]: for k in resume['Skill'][i][j]: add_content(f"\t\t{k} - {resume['Skill'][i][j][k]}", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=10, set_bold=False, set_all_caps=False, style_name=f"{k}", firstline_indent=0.0, left_indent=0.0) def generateExperience(): return def generateActivities(): #Insert Activties add_content(f"Activties:", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=14, set_bold=True, set_all_caps=False, style_name="ActivtiesBold", firstline_indent=0.0, left_indent=0.0) j = 0 for i in resume['Activties']: add_content(f"\t+ {i['Name']}", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=12, set_bold=False, set_all_caps=False, style_name=f"ActivitiesName{j}", firstline_indent=0.0, left_indent=0.0) add_content(f"{i['Description']}", align='Left', space_before=0, space_after=0, line_spacing=1, font_size=10, set_bold=False, set_all_caps=False, style_name=f"ActivityDes{j}", firstline_indent=0.0, left_indent=1.0) j += 1 functionDic = { 1: generateHeader, 2: generateObjective, 3: generateEducation, 4: generateCoursework, 5: generateSkills } for x in resumeOrder: functionDic[x]() # Save file to a /local directory os.chdir(f"{os.getcwd()}/resumeDoc") document.save(f"{user['LName']}.docx")
# -*- coding: utf-8 -*- from multiprocessing import Process from pptx import Presentation from docx import Document import sys import importlib importlib.reload(sys) wordfile=Document() # 给定ppt文件所在的路径 filepath='1030.pptx' pptx = Presentation(filepath) # 遍历ppt文件的所有幻灯片页 for slide in pptx.slides: for shape in slide.shapes: if shape.has_text_frame: text_frame= shape.text_frame for paragraph in text_frame.paragraphs: print(paragraph.text) #wordfile.add_paragraph(u'%s'%'test') save_path='text.docx' wordfile.save(save_path)
import pyodbc from docx import Document import io from PIL import Image import sys filename = raw_input("Path to Access Database(eg Database.mdb): ") con = pyodbc.connect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};Dbq=./"+filename+";Uid=;Pwd=;") cursor = con.cursor() output = Document() cursor.execute('sql to select the class in the db') classFrequecy = cursor.fetchall() print "Writing Classes Table" for classStd in classFrequecy: if classStd[0] is None: continue p = output.add_paragraph() p.add_run(classStd[0]) p.underline = True p.add_run("\n") p.add_run("Total: " + str(classStd[1])) cursor.execute("sql to get the students per class") students = cursor.fetchall()
def __init__(self, Document, rows, cols, style): self.table = Document.add_table(rows, cols)
def main(): from docx import Document from docx.enum.style import WD_STYLE_TYPE from docx.enum.text import WD_PARAGRAPH_ALIGNMENT from docx.shared import Pt INTRO = "It would be a pleasure to have the company of" ADDRESS = "at 11010 Memory Lane on the Evening of" DATE = "April 1st" TIME = "at 7 o'clock" # Get guest list with open("./guests.txt") as file: guests = file.read().splitlines() # Create styles for invitations document = Document() styles = document.styles # Add Script style style = styles.add_style("Script", WD_STYLE_TYPE.PARAGRAPH) font = style.font font.name = "MathJax_Caligraphic" # Only script font in default Ubuntu 18.04 font.size = Pt(14) font.all_caps = True # Script effect only applies in caps for above font font.bold = True style.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # Add Name style style = styles.add_style("Name", WD_STYLE_TYPE.PARAGRAPH) font = style.font font.name = "Liberation Sans" # Font in default Ubuntu 18.04 font.size = Pt(20) font.bold = True style.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # Add Date Style style = styles.add_style("Date", WD_STYLE_TYPE.PARAGRAPH) font = style.font font.name = "Liberation Sans" font.size = Pt(14) style.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # Make each invitation for guest in guests: document.add_paragraph(INTRO, style="Script") document.add_paragraph(guest, style="Name") document.add_paragraph(ADDRESS, style="Script") document.add_paragraph(DATE, style="Date") document.add_paragraph(TIME, style="Script") document.add_page_break() # Save invitations document.save("invitations.docx")
def __init__(self): self.document = Document()
def generadorDocumento(ecs, matricessd, matricescd, soluciones, ecsresueltas, datos, desv): document = Document() document.add_heading('Aproximación de Funciones', level=0) document.add_heading('Datos de Entrada', level=1) document.add_paragraph() # ----- ARMADO DE TABLA ----- anchoCeldas = Inches(.75) table = document.add_table(rows=1, cols=2, style='Table Grid') table.alignment = WD_TABLE_ALIGNMENT.CENTER hdr_cells = table.rows[0].cells sombreado1 = parse_xml(r'<w:shd {} w:fill="D9D9D9"/>'.format(nsdecls('w'))) sombreado2 = parse_xml(r'<w:shd {} w:fill="D9D9D9"/>'.format(nsdecls('w'))) hdr_cells[0].text = 'X' hdr_cells[1].text = 'Y' hdr_cells[0].width = anchoCeldas hdr_cells[1].width = anchoCeldas hdr_cells[0]._tc.get_or_add_tcPr().append(sombreado1) hdr_cells[1]._tc.get_or_add_tcPr().append(sombreado2) hdr_cells[0].paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER hdr_cells[1].paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER for a in range(len(datos)): row_cells = table.add_row().cells row_cells[0].text = str(datos['X'][a]) row_cells[1].text = str(datos['Y'][a]) row_cells[0].width = anchoCeldas row_cells[1].width = anchoCeldas row_cells[0].paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT row_cells[1].paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT # ----- MUESTRA DE FUNCIONES INGRESADAS POR EL USUARIO ----- document.add_paragraph() p = document.add_paragraph() p.add_run("Ecuaciones planteadas:").bold = True for a in range(len(ecs)): p = document.add_paragraph() p.add_run('F' + str(a + 1) + '(x) = ').bold = True p.add_run(str(ecs[a]).replace("**", "^").replace('log', 'ln')).italic = True # ----- MUESTRA DE RESULTADOS ----- c = 0 for a in ecs: c += 1 document.add_page_break() document.add_heading('Función ' + str(c), level=0) p = document.add_paragraph() p.add_run('F' + str(c) + '(x) = ').bold = True p.add_run(str(a).replace("**", "^").replace('log', 'ln')).italic = True document.add_paragraph() p = document.add_paragraph() p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER p.add_run("Matriz sin resolver").bold = True msr = matrizInvertidaDiagonalConCs(matricessd[c - 1]) temp = pd.DataFrame(msr) anchoCeldas = Inches(.75) table = document.add_table(rows=0, cols=len(temp.columns), style='Table Grid') table.alignment = WD_TABLE_ALIGNMENT.CENTER for fila in range(len(temp)): row_cells = table.add_row().cells for columna in range(len(temp.columns)): texto = str(temp[temp.columns[columna]][fila]).replace( "**", "^").replace('log', 'ln') if columna == len(temp.columns) - 2: sombreado = parse_xml( r'<w:shd {} w:fill="D9D9D9"/>'.format(nsdecls('w'))) row_cells[columna]._tc.get_or_add_tcPr().append(sombreado) else: if texto == "1": texto = "n" else: texto = "Σ" + texto row_cells[columna].text = texto row_cells[columna].width = anchoCeldas document.add_paragraph() p = document.add_paragraph() p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER p.add_run("Matriz resuelta").bold = True msr = matrizInvertidaDiagonalConCs(matricescd[c - 1]) temp = pd.DataFrame(msr) anchoCeldas = Inches(.75) table = document.add_table(rows=0, cols=len(temp.columns), style='Table Grid') table.alignment = WD_TABLE_ALIGNMENT.CENTER for fila in range(len(temp)): row_cells = table.add_row().cells for columna in range(len(temp.columns)): row_cells[columna].text = str( temp[temp.columns[columna]][fila]) row_cells[columna].width = anchoCeldas if columna == len(temp.columns) - 2: sombreado = parse_xml( r'<w:shd {} w:fill="D9D9D9"/>'.format(nsdecls('w'))) row_cells[columna]._tc.get_or_add_tcPr().append(sombreado) document.add_paragraph() document.add_paragraph() p = document.add_paragraph() p.add_run("Conjunto de Soluciones: ").bold = True for i in reversed(soluciones[c - 1]): p = document.add_paragraph() p.add_run(str(i) + ': ').bold = True p.add_run(str(soluciones[c - 1][i])) document.add_paragraph() p = document.add_paragraph() p.add_run("Ecuacion final: ").bold = True p.add_run( str(ecsresueltas[c - 1]).replace("**", "^").replace('log', 'ln')) p = document.add_paragraph() p.add_run("Desviacion: ").bold = True p.add_run(str(desv[c - 1])) document.add_page_break() document.add_heading("Gráficos", level=0) # Grafico plt.scatter(datos['X'], datos['Y'], alpha=0.5) x = np.arange(datos['X'].min(), datos['X'].max(), (datos['X'].max() - datos['X'].min()) / 100) numeroEcuacion = 0 leyendas = [] for ecuacion in ecsresueltas: numeroEcuacion += 1 y = eval( str(ecuacion).replace("sin", "msin").replace("cos", "mcos").replace( "log", "mln")) plt.plot(x, y, label=str(ecuacion)) leyendas.append("Ecuación " + str(numeroEcuacion)) plt.legend(leyendas) diagramaDispersion = BytesIO() plt.savefig(diagramaDispersion) document.add_picture(diagramaDispersion, width=Inches(6)) plt.clf() last_paragraph = document.paragraphs[-1] last_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER numeroEcuacion = 0 for ecuacion in ecsresueltas: plt.scatter(datos['X'], datos['Y'], alpha=0.5) numeroEcuacion += 1 y = eval( str(ecuacion).replace("sin", "msin").replace("cos", "mcos").replace( "log", "mln")) plt.plot(x, y, label=str(ecuacion)) leyendas = ["Ecuación " + str(numeroEcuacion)] plt.legend(leyendas) diagramaDispersion = BytesIO() plt.savefig(diagramaDispersion) document.add_picture(diagramaDispersion, width=Inches(5)) plt.clf() last_paragraph = document.paragraphs[-1] last_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER document.add_page_break() document.add_heading("Conclusión", level=0) p = document.add_paragraph() p.add_run('La función con menor desviación (óptima) es la ') optima = min(range(len(desv)), key=desv.__getitem__) p.add_run('Función ' + str(optima + 1) + ":").bold = True p = document.add_paragraph() p.add_run('F' + str(optima + 1) + "(x): ").bold = True p.add_run(str(ecs[optima]).replace("**", "^").replace('log', 'ln')) p = document.add_paragraph() p.add_run('F' + str(optima + 1) + "(x): ").bold = True p.add_run( str(ecsresueltas[optima]).replace("**", "^").replace('log', 'ln')) p = document.add_paragraph() p.add_run("Desviación: ").bold = True p.add_run(str(desv[optima])) plt.scatter(datos['X'], datos['Y'], alpha=0.5) y = eval( str(ecsresueltas[optima]).replace("sin", "msin").replace( "cos", "mcos").replace("log", "mln")) plt.plot(x, y) leyendas = ["Ecuación " + str(optima + 1)] plt.legend(leyendas) diagramaDispersion = BytesIO() plt.savefig(diagramaDispersion) document.add_picture(diagramaDispersion, width=Inches(3.5)) plt.clf() last_paragraph = document.paragraphs[-1] last_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER p = document.add_paragraph() p.add_run('La función con mayor desviación (peor función) es la ') optima = max(range(len(desv)), key=desv.__getitem__) p.add_run('Función ' + str(optima + 1) + ":").bold = True p = document.add_paragraph() p.add_run('F' + str(optima + 1) + "(x): ").bold = True p.add_run(str(ecs[optima]).replace("**", "^")) p = document.add_paragraph() p.add_run('F' + str(optima + 1) + "(x): ").bold = True p.add_run(str(ecsresueltas[optima]).replace("**", "^")) p = document.add_paragraph() p.add_run("Desviación: ").bold = True p.add_run(str(desv[optima])) plt.scatter(datos['X'], datos['Y'], alpha=0.5) y = eval( str(ecsresueltas[optima]).replace("sin", "msin").replace( "cos", "mcos").replace("log", "mln")) plt.plot(x, y) leyendas = ["Función " + str(optima + 1)] plt.legend(leyendas) diagramaDispersion = BytesIO() plt.savefig(diagramaDispersion) document.add_picture(diagramaDispersion, width=Inches(3.5)) plt.clf() last_paragraph = document.paragraphs[-1] last_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER document.save('aproximacion.docx')
def format_docx_file(request): # read the parameters passed in student_name = request["studentName"] subject = request["subject"] department = full_department_names[request["department"]] # department name street_address = request["street"] postal_address = request["postal"] file_name = request["reportFile"] ############################################################# ######################## Start job ########################## ############################################################# path_to_file = os.path.join("workshop", "tmp", file_name) # where to read file from document = Document(path_to_file) # open file as a document object # set the deafult style (called 'Normal') to Times New Roman size 11 style = document.styles['Normal'] font = style.font font.name = 'Times New Roman' font.size = Pt(11) ############################################################# ######################## COVER PAGE ######################### ############################################################# # start by adding the university logo document.add_picture(path_to_waterloo_logo) # then the department name document.add_paragraph(department + "\n\n") # then the title title = document.add_paragraph() title_run = title.add_run(subject + "\n\n") title_run.bold = True title_run.font.size = Pt(14) # then prepared for UW prepared_for = document.add_paragraph() prepared_for_run = prepared_for.add_run("A Report Prepared For:\n") prepared_for_run.bold = True prepared_for.add_run("The University of Waterloo\n") # then student name prepared_by = document.add_paragraph() prepared_by_run = prepared_by.add_run("Prepared By:\n") prepared_by_run.bold = True prepared_by.add_run(student_name + "\n") prepared_by.add_run(street_address + "\n") prepared_by.add_run(postal_address + "\n") document.add_page_break() # add page break at end of cover page ############################################################# ######################## Finish and save #################### ############################################################# final_file_name = "{} - {}.docx".format(student_name, subject) path_to_output = os.path.join("workshop", "output", final_file_name) # where to output file document.save(path_to_output) # save file in desired dest
from docx import Document from docx.shared import Cm from docx.enum.text import WD_PARAGRAPH_ALIGNMENT from docx.enum.style import WD_STYLE_TYPE from docx.shared import Pt from docx.shared import Inches import numpy as np import pandas as pd from docx.oxml.ns import qn import datetime #参数配置 com_name = '嘉名染整' #公司名 transformer = '地下室 1#' #变压器名 now = datetime.datetime.now() #当前时间的datetime document = Document() #添加样式 #开始写 for i in range(0, 5): document.add_paragraph('') pic = document.add_picture('./pic/poweryun.png', height=Cm(3.43), width=Cm(9.83)) #南德电气1.82×6.07 电能卫士3.43×9.83 last_paragraph = document.paragraphs[-1] last_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # 图片居中设置 #标题1样式无法被替代就使用如下方法: new_heading_style = styles.add_style('New Heading', WD_STYLE_TYPE.PARAGRAPH)
def main(): ### MARK1 抓取命令行参数 parser = argparse.ArgumentParser( prog='PGS_report', description= 'Save PGS project\'s graph (.png) file to a (.docx) word file and sex info file to an excel (.xlsx) file and write a report based on word template to a (.docx) file. ' ) parser.add_argument( '--project_type', action="store", required=False, default="PGS", choices=["PGS", "ChromInst", "ONCPGD", "ONPGS", "CPGD", "IBPGS"], help="The Project type. [default=PGS]") parser.add_argument('--project_dir', action="store", required=True, help='The project output directory') parser.add_argument('--bin_size', action="store", required=True, help='The bin size, like 1000K. ') parser.add_argument( '--cnv_file', action="store", required=True, help= 'The call cnv file created by pipeline. It is used to create the sex info excel file and PGS test results are extracted from it.' ) parser.add_argument('--patient_info', action="store", required=True, help='The patient info txt file') parser.add_argument( '--out_dir', action="store", required=True, help='The output graph/info/report files\'s directory ') parser.add_argument('--data_sts', action="store", required=True, help="data.sts file") args = parser.parse_args() project_type = args.project_type project_dir = os.path.abspath(args.project_dir) bin_size = args.bin_size cnv_file = os.path.abspath(args.cnv_file) patient_info = os.path.abspath(args.patient_info) out_dir = os.path.abspath(args.out_dir) data_sts = os.path.abspath(args.data_sts) ### MARK2 解析参数文件,转化为变量 logger = create_logging('PGS report logger') ## MARK2.1 解析送检单 patient_info dict_patient_info, dict_sample_info, sample_barcodes = parse_patient_info( patient_info) logger.info("DONE:parse patient info") # MARK2.1.1 将缺失值“单击此处输入文字”替换为空字符 for (k, v) in dict_patient_info.items(): if v in MISSING_VALUES: dict_patient_info[k] = '' logger.info("DONE:clean patient info") # 信息抓取补充 ######read qc_discription.txt dict_qc = {} fh_sts = open(data_sts) for line in fh_sts.readlines(): if line.split(): line = line.rstrip() list_sts = line.split("\t") dict_qc[list_sts[0]] = list_sts[10] logger.info("DONE:parse data.sts") ## MARK2.2 解析结果 cnv_file result_barcodes, dict_result, dict_result_gender = parse_cnv_file( cnv_file, dict_qc) logger.info("DONE:parse cnv file") ## MARK2.3 解析cnv图 dict_red_blue_png_no_XY = parse_graph(project_dir, 'graph', 'with_chrID_no_XY', result_barcodes, bin_size) dict_red_blue_png_XY = parse_graph(project_dir, 'graph', 'with_chrID_with_XY', result_barcodes, bin_size) dict_colorful_png_XY = parse_graph(project_dir, 'graph1', 'with_chrID_with_XY', result_barcodes, bin_size) logger.info("DONE:parse png graph") ### MARK3 根据需求,生成内部参量 ## MARK 2018/3/1补充,由送检barcode和结果barcode,生成一个新的用于出结果的barcode report_barcodes = gen_report_barcodes(sample_barcodes, result_barcodes) ## MARK3.1 抓取 项目名,医院名,女方姓名,是否要logo # sample_sheet_ID 项目名 sample_sheet_ID = dict_patient_info["ProjectID"] if sample_sheet_ID in MISSING_VALUES: logger.error("Project ID is missing!") exit(1) # hospital_ID 医院名 array_sample_sheet_ID = sample_sheet_ID.split('_') hospital_ID = array_sample_sheet_ID[3] # woman_name 女方姓名 woman_name = dict_patient_info['WomanName'] if 'Control' in sample_sheet_ID or 'control' in sample_sheet_ID: woman_name = 'Control' # if_logo 是否要logo if_logo = dict_patient_info['Template'] ## MARK3.2 完整的输出名 out_name = "Project_" + sample_sheet_ID + u"(" + woman_name + u")" if woman_name == '': out_name = "Project_" + sample_sheet_ID ### MARK4 结果展示 ####################### ## MARK4.1 生成CNV全图 # CNV全图名 out_graph = out_dir + '/' + out_name + u"CNV全图.docx" document = Document() for barcode in report_barcodes: png_fullpath = dict_colorful_png_XY[barcode] if os.path.exists(png_fullpath): document.add_picture(png_fullpath, width=Inches(6.35)) else: log = "save graph file : " + png_fullpath + " does not exist!" logger.warning(log) document.save(out_graph) logger.info("DONE:save graph file") ############################################################ ## MARK4.2 生成性别信息表 # 性别信息名 out_info = out_dir + '/' + out_name + "info.xlsx" wb = Workbook() ws = wb.active ws.title = "info" excel_row = 1 for barcode in report_barcodes: ws.cell(row=excel_row, column=1, value=barcode) ws.cell(row=excel_row, column=2, value=dict_sample_info.get(barcode, barcode)) ws.cell(row=excel_row, column=3, value=dict_result_gender[barcode]) excel_row += 1 ws = adjustColumnWidth(ws) wb.save(out_info) logger.info("DONE:save sex info xlsx file") #################### ## MARK4.3 生成报告 ## MARK 报告名 project_type_name = '' if project_type == 'ONCPGD' or project_type == 'ONPGS': project_type_name = "24h-胚胎染色体拷贝数检测报告单" elif project_type == 'CPGD': project_type_name = "MALBAC-PGD™ 染色体病胚胎植入前遗传学诊断报告单" elif project_type == 'PGS' or project_type == 'IBPGS': project_type_name = "胚胎植入前遗传学筛查(PGS)检测报告单" else: project_type_name = "ChromInst 9h-胚胎染色体拷贝数检测报告单" out_report = out_dir + '/' + out_name + project_type_name + ".docx" ## MARK 报告模板 if project_type == 'ONCPGD' or project_type == 'ONPGS': report_temp = ONCPGD_REPORT_TEMPLATE elif project_type == 'CPGD': report_temp = CPGD_REPORT_TEMPLATE else: if if_logo == "Yes" or if_logo == "yes": report_temp = PGS_REPORT_TEMPLATE else: report_temp = PGS_REPORT_TEMPLATE_WITHOUT_LOGO # MARK4.3.1 定制化 # 某些医院要求不写医院名 if hospital_ID in HOSPITAL_ID_WITHOUT_HOSPITAL_NAME: dict_patient_info["SubmissionOrganization"] = '' # ONCPGD活检日期填到送检日期位置 if project_type == 'ONCPGD' or project_type == 'ONPGS': dict_patient_info["SubmissionDate"] = dict_patient_info["BiopsyDate"] ### MARK4.3.4 取模版,贴报告 shutil.copyfile(report_temp, out_report) ReportTML = DocxTemplate(out_report) ## 报_送检信息 # 获取报告日期 now = datetime.datetime.now() month = now.month day = now.day if month < 10: month = '0' + str(month) if day < 10: day = '0' + str(day) report_date = u"{}年{}月{}日".format(now.year, month, day) context = dict_patient_info context['ReportDate'] = report_date ## 报_核型 result = [] for barcode in report_barcodes: tmp_dict = { 'sample_id': dict_sample_info.get(barcode, barcode), 'sample_barcode': barcode, 'test_result': dict_result[barcode] } result.append(tmp_dict) context['result'] = result ## 报_图片 # 确定报告中用的红蓝图 dict_report_png = {} report_png = '' for barcode in result_barcodes: red_blue_no_XY = dict_red_blue_png_no_XY[barcode] red_blue_XY = dict_red_blue_png_XY[barcode] if dict_result_gender[barcode] == "XX" or dict_result_gender[ barcode] == "XY": report_png = red_blue_no_XY elif dict_result[barcode] == "N/A": report_png = red_blue_no_XY else: report_png = red_blue_XY dict_report_png[barcode] = report_png subdoc_picture = ReportTML.new_subdoc() for barcode in report_barcodes: doc_png = dict_report_png[barcode] if os.path.exists(doc_png): subdoc_picture.add_picture(doc_png, width=Inches(6.1)) else: log = "save report file: " + doc_png + " does not exist!" logger.warning(log) context['subdoc_picture'] = subdoc_picture # 写入,报告完成 ReportTML.render(context) ReportTML.save(out_report) logger.info("DONE:save report file")
from docx import Document document = Document() # section: user basic info. name = input('Enter your full name: ') document.add_heading(name) user_info = document.add_paragraph() address = input('Enter your address: ') user_info.add_run(address + '\n').italic = True email = input('Enter your email: ') user_info.add_run(email + '\n') phone_number = input('Enter your phone number: ') user_info.add_run(phone_number) # section: user bio. document.add_heading('Career Profile') user_bio = input('Describe yourself: ') document.add_paragraph(user_bio) # section: skills document.add_heading('Skills') while True: has_skill = input('Do you have skill: (y/n) ') if has_skill.lower() == 'y':
from docx import Document from docx.shared import Inches document = Document() document.add_heading('Document Title', 0) p = document.add_paragraph('A plain paragraph having some ') p.add_run('bold').bold = True p.add_run(' and some ') p.add_run('italic.').italic = True document.add_heading('Heading, level 1', level=1) document.add_paragraph('Intense quote', style='Intense Quote') document.add_paragraph('first item in unordered list', style='List Bullet') document.add_paragraph('first item in ordered list', style='List Number') document.add_picture('C:\\Users\\soumi\\Pictures\\application.jpg', width=Inches(1.25)) recordset = [{ "id": 1, "qty": 2, "desc": "New item" }, { "id": 2, "qty": 2, "desc": "New item"
def writeDoc(self, Client, table_issuesla, table_plevelsla, tklist): doc = Document() style = doc.styles['Normal'] font = style.font font.name = 'Calibri' font.size = Pt(12) doc.add_picture("report_coverImg.png", width=Cm(16), height=Cm(24)) doc.add_heading('GENERAL REPORT OVERVIEW', 1) para_01 = "The Delivery of Service document is a monthly report prepared by Tickbox presenting summary data for " + Client + ". This activity is from the period of " + self.reportStart + " -- " + self.reportEnd + "." p = doc.add_paragraph(para_01) p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY p = doc.add_paragraph( 'For questions or feedback regarding this monthly report, please contact [email protected]' ) p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY p = doc.add_paragraph('') doc.add_heading('DOCUMENT CONTROL', 1) table = doc.add_table(rows=6, cols=2) #table.style='Table Grid' table.style = 'Light Shading Accent 1' table.cell(0, 0).text = "File Name:" # title=createDocTilte() table.cell(0, 1).text = self.title table.cell(1, 0).text = "Version:" table.cell(1, 1).text = self.VERSION #table.cell(1,1).text="V1.0" table.cell(2, 0).text = "Status:" table.cell(2, 1).text = self.STATUS #table.cell(2,1).text= "FINAL" table.cell(3, 0).text = "Release Date:" #releaseDate=createReleaseDate() table.cell(3, 1).text = self.releaseDate table.cell(4, 0).text = "Delive From:" table.cell(4, 1).text = self.DELIVER #table.cell(4,1).text="Sean Buckingham" table.cell(5, 0).text = "Report Manager:" table.cell(5, 1).text = self.DEV_MANAGER #table.cell(5,1).text="Sean Buckingham" p = doc.add_paragraph('') doc.add_heading('EMERGENCY SMS KEY CONTACT LIST', 1) contactlist = pd.read_csv("ClientContact.csv", encoding='cp1252') shortlist = contactlist[(contactlist['CUSTOMER'] == Client)] row = len(shortlist) smstable = doc.add_table(rows=row + 1, cols=3) #smstable.style='Table Grid' smstable.style = 'Light Shading Accent 1' smstable.cell(0, 0).text = "Contact Name" smstable.cell(0, 1).text = "Telephone" smstable.cell(0, 2).text = "Email" pos = shortlist.index for r in range(row): try: smstable.cell(r + 1, 0).text = shortlist['SMSName'][pos[r]] except: smstable.cell(r + 1, 0).text = 'TBC' try: smstable.cell(r + 1, 1).text = shortlist['SMSNumber'][pos[r]] except: smstable.cell(r + 1, 1).text = 'TBC' smstable.cell(r + 1, 2).text = shortlist['EMAIL'][pos[r]] doc.add_page_break() # doc.add_heading('Executive Summary',1) # doc.add_paragraph() doc.add_heading('Tickbox Service Desk', 1) doc.add_heading('Service Desk Overview', 2) p = doc.add_paragraph( 'The Tickbox Service Desk is committed to ensuring your requests are responded to and resolved within our SLA targets.' ) p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY tk_complete, tk_count = self.countMonthTicket(Client, tklist) p = doc.add_paragraph('From ' + self.reportStart + ' to ' + self.reportEnd + ', Tickbox received total of ' + str(tk_count) + ' tickets from ' + Client + ', and completed ' + str(tk_complete) + ' tickets.') p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY doc.add_heading('High Priority Ticket', 2) ptext = "Incidents cause interruptions of business activities and must be solved with certain urgency. The following addresses high priority 1 and priority 2 incident tickets, their cause, resolution, feedback, date of occurrence and duration time." p = doc.add_paragraph(ptext) p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY # doc.add_heading("Total Ticket Amounts by Priorities",4) # y=TicketPriorityGraph(Client,tklist) # x=y.get_figure() # filename=Client+".png" # x.savefig(filename,bbox_inches='tight') # doc.add_picture(filename,height=Cm(8)) doc.add_page_break() doc.add_heading('High Priority Ticket Response and Resolution', 2) table = doc.add_table(rows=3, cols=4) table.style = 'Light Shading Accent 1' table.cell(0, 0).text = "Priority Level" table.cell(0, 1).text = "Interpretation" table.cell(0, 2).text = "Response Target" table.cell(0, 3).text = "Resolution Target" table.cell(1, 0).text = "Priority 1" table.cell(2, 0).text = "Priority 2" table.cell( 1, 1 ).text = "High impact to customers identified as severe incident resulting in an outage." table.cell(1, 2).text = "Response Target within 15 minutes" table.cell(1, 3).text = "Resolution Target within 4 hours" table.cell( 2, 1 ).text = "High business impact to customers identified as potential loss or interruption of service." table.cell(2, 2).text = "Response Target within 30 minutes" table.cell(2, 3).text = "Resolution Target within 8 hours" doc.add_heading('Classification of New Tickets', 2) ptext = "A Tickbox Engineer categorises each ticket based on an Issue and Sub-Issue type to assist the Service Desk in solving the issue and to report back to our clients on those numbers." p = doc.add_paragraph(ptext) p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY doc.add_heading("Issue Type by Ticket Numbers", 3) issnum, yissue = self.IssueGraph(Client, tklist) subnum, ysub = self.SubIssueGraph(Client, tklist) ptext = "The top issue type this month was \"" + str( issnum ) + "\" request and the top sub issue type was \"" + subnum + "\" request." p = doc.add_paragraph(ptext) p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY doc.add_heading("Top Five Issues by Amount of Tickets", 4) x = yissue.get_figure() filename = Client + ".png" x.savefig(filename, bbox_inches='tight') doc.add_picture(filename, height=Cm(8)) doc.add_page_break() doc.add_heading("Top Five SubIssues by Amount of Ticketss", 4) x = ysub.get_figure() filename = Client + ".png" x.savefig(filename, bbox_inches='tight') doc.add_picture(filename, height=Cm(8)) doc.add_page_break() doc.add_heading("Additional Ticket Information", 3) usernum, y = self.TopUsersGraph(Client, tklist) ptext = "This month, total " + usernum + " different users logged cases." doc.add_heading("Most Active Users", 4) x = y.get_figure() filename = Client + ".png" x.savefig(filename, bbox_inches='tight') doc.add_picture(filename, height=Cm(8)) doc.add_page_break() #table_issuesla,table_plevelsla title = "SLA Met Result" doc.add_heading(title, 3) sla_mettotal = 0 for i in range(table_plevelsla.shape[0]): sla_mettotal = sla_mettotal + int( table_plevelsla['Actual SLA Met Tickets'][i]) #d=int(table_plevelsla['Total Tickets'].sum())/int(table_plevelsla.shape[0]) #sla_actual=float(table_plevelsla['Actual SLA Met Tickets'].sum())/d sla_total = int(table_plevelsla['Total Tickets'].sum()) sla_actual = sla_mettotal / sla_total sla_p = "{:.1f}%".format(sla_actual * 100) #sla_mettotal=int(table_plevelsla['Actual SLA Met Tickets'].sum()) ptext = "This month," + str(int(sla_mettotal)) + " out of " + str( int(sla_total) ) + " tickets met SLA standard, and the SLA met percentage is: " + str( sla_p) + "." p = doc.add_paragraph(ptext) p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY # doc.add_page_break() # tb=doc.add_table(table_plevelsla.shape[0]+1,table_plevelsla.shape[1]) # tb.style='Table Grid' # for j in range(table_plevelsla.shape[-1]): # tb.cell(0,j).text=table_plevelsla.columns[j] # # for i in range(table_plevelsla.shape[0]): # for j in range(table_plevelsla.shape[-1]): # tb.cell(i+1,j).text=str(table_plevelsla.values[i,j]) #doc.add_page_break() title = "Table: SLA Met Result by Issue and SubIssue Types" doc.add_heading(title, 3) tb = doc.add_table(table_issuesla.shape[0] + 1, table_issuesla.shape[1]) tb.style = 'Table Grid' for j in range(table_issuesla.shape[-1]): tb.cell(0, j).text = table_issuesla.columns[j] for i in range(table_issuesla.shape[0]): for j in range(table_issuesla.shape[-1]): tb.cell(i + 1, j).text = str(table_issuesla.values[i, j]) #added at 19-04-2018 for if fail sla, add table for sla fail tickets Count_SLA_Fail = self.sla_fail(Client, tklist) if (len(Count_SLA_Fail) > 0): doc.add_page_break() title = "Table: SLA Failed Ticket List" doc.add_heading(title, 3) tb = doc.add_table(Count_SLA_Fail.shape[0] + 1, Count_SLA_Fail.shape[1]) tb.style = 'Table Grid' for j in range(Count_SLA_Fail.shape[-1]): tb.cell(0, j).text = Count_SLA_Fail.columns[j] for i in range(Count_SLA_Fail.shape[0]): for j in range(Count_SLA_Fail.shape[-1]): tb.cell(i + 1, j).text = str(Count_SLA_Fail.values[i, j]) doc.add_page_break() doc.add_picture("report_endImg.jpg", height=Cm(24)) file = "./new/" + Client + ".docx" doc.save(file)
def main(): print("Starting Program \n\n") freeze_support() #Set Arguments args = param_parser() template_XML = args.xml template_WD = args.wd variable_file = args.variable_file outputdir = args.outputpath flownyear = args.flownyear fid1 = args.fid1 fid2 = args.fid2 fid3 = args.fid3 fid4 = args.fid4 myvars = {} with open(variable_file) as varfile: for line in varfile: var, val = line.partition(":")[::2] myvars[var.strip()] = val.strip() products = {} products['dem_asc'] = {} products['dem_asc']['uid'] = fid1 products['dem_asc']['surface_type'] = 'DEM' products['dem_asc']['product_type'] = 'GRID' products['dem_asc']['format'] = 'ASCII' products['dem_asc']['xml'] = os.path.join( outputdir, "{0}_{2}_z{1}_DEM_GRID_1_ASCII.xml".format(myvars['#areaname#'], myvars['#zone#'], flownyear)) products['dem_asc'][ 'limitations'] = 'DEM accuracy will be limited by the spatial accuracy of the LiDAR point data and will contain some additional error due to interpolation, particularly in areas of dense vegetation where ground points are sparse. There may also be some minor error due to ground point misclassification.' products['dem_xyz'] = {} products['dem_xyz']['uid'] = fid2 products['dem_xyz']['surface_type'] = 'DEM' products['dem_xyz']['product_type'] = 'GRID' products['dem_xyz']['format'] = 'TEXT' products['dem_xyz']['xml'] = os.path.join( outputdir, "{0}_{2}_z{1}_DEM_GRID_1_TEXT.xml".format(myvars['#areaname#'], myvars['#zone#'], flownyear)) products['dem_xyz'][ 'limitations'] = 'DEM accuracy will be limited by the spatial accuracy of the LiDAR point data and will contain some additional error due to interpolation, particularly in areas of dense vegetation where ground points are sparse. There may also be some minor error due to ground point misclassification.' products['int'] = {} products['int']['uid'] = fid3 products['int']['surface_type'] = 'INT-First' products['int']['product_type'] = 'Other' products['int']['format'] = 'TIFF' products['int']['xml'] = os.path.join( outputdir, "{0}_{2}_z{1}_INT-First_Other_1_TIFF.xml".format( myvars['#areaname#'], myvars['#zone#'], flownyear)) products['int'][ 'limitations'] = 'The intensity image accuracy will be limited by the spatial accuracy of the LiDAR point data.' products['las_ahd'] = {} products['las_ahd']['uid'] = fid4 products['las_ahd']['surface_type'] = 'LiDAR-AHD' products['las_ahd']['product_type'] = 'MassPoints' products['las_ahd']['format'] = 'LAS' products['las_ahd']['xml'] = os.path.join( outputdir, "{0}_{2}_z{1}_LiDAR-AHD_MassPoints_1_LAS.xml".format( myvars['#areaname#'], myvars['#zone#'], flownyear)) products['las_ahd'][ 'limitations'] = 'The workflow and quality assurance processes were designed to achieve the Level 2 requirement for removal of significant anomalies which remain in the ground class (2), vegetation classes (3, 4, 5), buildings and structures (6), water (9), and bridges (10), and achieve a ground point misclassification rate of 2% or less. The classification accuracy was not measured.' print(myvars['#areaname#']) word_Doc = os.path.join( outputdir, "TMR_Metadata_{0}.docx".format(myvars['#areaname#'])) document = Document(template_WD) tables = document.tables for table in tables: for row in table.rows: for cell in row.cells: for para in cell.paragraphs: for key, val in myvars.items(): if key in para.text: para.text = para.text.replace(key, val) document.save(word_Doc) with open(template_XML, encoding='latin-1') as myasciif: data = myasciif.read() for key, val in myvars.items(): if key in data: print(key, val) data = data.replace(key, val) for product, params in products.items(): testdata = data print(product) xmlfile = products[product]['xml'] limitations = products[product]['limitations'] uid = products[product]['uid'] testdata = testdata.replace('#uid#', uid) testdata = testdata.replace('#limitations#', limitations) for key1, value1 in params.items(): testdata = testdata.replace('#{0}#'.format(key1), value1) print(xmlfile) with open(xmlfile, 'wb') as f: #print(testdata) testdata = testdata.encode(encoding='latin-1', errors='strict') f.write(testdata) xmlfile = '' return
def ticket_gen(input_file): # Initialized the excel file which containing the detail sheet = load_workbook(str(input_file)).active for row in sheet.iter_rows(max_col=11, values_only=True): row_data = [str(value) for value in row] doc_name = row_data[-1] if row_data[0] == 'Summary': pass elif row_data[0] == 'None': break else: # Open up a blank document with the default template ticket = Document() # Summary section summary = ticket.add_paragraph('') summary_title = summary.add_run('[Summary] ') summary_title.bold = True summary.add_run(row_data[0]) # Precondition precondiction = ticket.add_paragraph('') precondiction.add_run('[Precondition]').bold = True # Testing Type testing_type = ticket.add_paragraph('') testing_type.add_run('Testing Type: ').bold = True testing_type.add_run(row_data[1]) # Connected Devices connected_devices = ticket.add_paragraph('') connected_devices.add_run('Connected Devices ').bold = True connected_devices.add_run(row_data[2]) ticket.add_paragraph(row_data[3]) # Test Steps test_steps = ticket.add_paragraph('') test_steps.add_run('[Test Steps] ').bold = True ticket.add_paragraph(row_data[4]) # Expected Result expected_result = ticket.add_paragraph('') expected_result.add_run('[Expected Result] ').bold = True ticket.add_paragraph(row_data[5]) # Actual Result actual = ticket.add_paragraph('') actual.add_run('[Actual Result] ').bold = True ticket.add_paragraph(row_data[6]) # Reproduced Rate repo = ticket.add_paragraph('') repo.add_run('[Reproduced Rate] ').bold = True ticket.add_paragraph('10/10') # Spec spec = ticket.add_paragraph('') spec.add_run('[Spec Reference]').bold = True ticket.add_paragraph(row_data[7]) # time time = ticket.add_paragraph('') time.add_run('[Occurrence Time] ').bold = True time.add_run(row_data[8]) # Comments comments = ticket.add_paragraph('') comments.add_run('[Comments]').bold = True ticket.add_paragraph(row_data[9]) # Hardware hardware = ticket.add_paragraph('') hardware.add_run('[Hardware Info]').bold = True csm = ticket.add_paragraph('') csm.add_run('CSM: ').bold = True csm.add_run('GB MY22 CSM3.7 PV01H, High, NA') # Related cases rel_case = ticket.add_paragraph('') rel_case.add_run('[Found by running test case]').bold = True ticket.add_paragraph(row_data[10]) # Other sw = ticket.add_paragraph('') sw.add_run('SW build info: ').bold = True sw.add_run(sw_build) contact = ticket.add_paragraph('') contact.add_run('Contact Phone Number: ').bold = True contact.add_run('+886 966603203') sub = ticket.add_paragraph('') sub.add_run('Submitter: ').bold = True sub.add_run('<<<*****@*****.**>>') ticket.save('{}.docx'.format(doc_name))
from docx import Document wordDoc = Document('БурковМП ГарцевЕА ЛитвиновКЛ 7301 лаб 2.docx') Up = [] Ucp = [] U = [] delta = [] kf = [1.11, 1, 1.15] for i in range(3): print(wordDoc.tables[4].columns[i + 1].cells[2].text) Up.append(float(wordDoc.tables[4].columns[i + 1].cells[2].text)) Ucp.append(Up[i] / 1.11) wordDoc.tables[4].columns[i + 1].cells[3].text = str(Ucp[i]) U.append(kf[i] * Ucp[i]) wordDoc.tables[4].columns[i + 1].cells[4].text = str(U[i]) delta.append(100 * (Up[i] - U[i]) / U[i]) wordDoc.tables[4].columns[i + 1].cells[5].text = str(delta[i]) print(Ucp) print(U) print(delta) wordDoc.save('БурковМП ГарцевЕА ЛитвиновКЛ 7301 лаб 2.docx')
Settings ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: from docx import Document # :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: doc = Document(docx_file) settings = doc.settings # :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: element # Элемент lxml, проксируемый этим объектом
root2 = tree2.getroot() #tree3 = parse('./script/Virus_Check.xml') #root3 = tree3.getroot() hostname = root1.find("HOSTNAME").text start_time = root1.find("START_TIME").text OS_name = root1.find("OS_NAME").text OS_version = root1.find("OS_VERSION").text OS_info = OS_name.strip() + ' - ' + OS_version.strip() Vaccine = root2.find("Vaccine").text #양식 로드 document = Document('./script/대국민 PC 원격 보안점검 결과보고서 양식.docx') #본문 로드 para = document.paragraphs #시작 시간 run = para[3].add_run(start_time) font1 = run.font font1.size = Pt(10) #종료 시간 dt = datetime.datetime.now() end_time = str(dt.year) + "-" + str(dt.month).zfill(2) + "-" + str( dt.day).zfill(2) + " " + str(dt.hour).zfill(2) + ":" + str( dt.minute).zfill(2) + ":" + str(dt.second).zfill(2) run = para[4].add_run(end_time) font1 = run.font font1.size = Pt(10)
import matplotlib #import umap ##only necessary for weird experiments import matplotlib.pyplot as plt #import seaborn as sns ##only necessary for weird experiments from mpl_toolkits.mplot3d import proj3d import matplotlib.cm as cm from torch.nn import CosineSimilarity from sty import fg, bg, ef, rs, RgbFg from sklearn.preprocessing import MinMaxScaler import syntok.segmenter as segmenter from ansi2html import Ansi2HTMLConverter import text_graph conv = Ansi2HTMLConverter() document = Document() ## Create a python-docx document cos = CosineSimilarity(dim=1, eps=1e-6) class bcolors: #For converting the ANSI string to HTML - Sty is not supported well :( HIGHLIGHT = '\33[43m' END = '\033[0m' granularity_level = "Sent" #"Word" "Sent" "Paragraph" dynamic = False ##Controls if we highlight the more important words more or not graph = False ###ONLY WORKS WITH "Word" granularity_level word_doc = True html = True word_window_size = 10 ##This is really doubled since it's Bi-directional. Only matters for word level granularity highlight_color_intensity = 175 # try values between 25 and 200
def createTopicTable(patentList, docFileName): document = Document() for section in document.sections: section.orientation = WD_ORIENT.LANDSCAPE table = document.add_table(rows=1, cols=len(patentList) + 1) fillInTopicData(table, patentList) document.save(docFileName)