def main(argv): parser = argparse.ArgumentParser() parser.add_argument("document", help="Document to load") parser.add_argument("-n", type=int, default=10, help="Number of previous words to include") parser.add_argument("-o", "--output", default="", help="Output file") parser.add_argument("--chapter", type=int, help="Load only refs from chapter") parser.add_argument("--sort", action="store_true") args = parser.parse_args(argv) if len(args.output) == 0: output_filename = args.document.rstrip(".docx") + "-endnotes.docx" else: output_filename = args.output d = Document(args.document) notes, note_ids = get_notes(d) pars = get_paragraphs(d, args.chapter) ids = set([]) for run, _ in pars: ids.update(map(lambda x: x.id, run.endnote_references)) # This is definitely doing something; there are only a few that don't match up assert ids.issubset(note_ids) d = Document() if args.sort: pars = sorted(pars, key=ref_to_note(notes)) for ref, p in pars: id = ref.endnote_references[0].id runs = p.runs ix = ref_to_run_ix(id, runs) if ix == -1: print(f"Error: reference {id} not found in following paragraph:") print(p.text) sys.exit(1) pre_text = get_previous_words(ix, runs, args.n) for p2 in notes[id].paragraphs: np = d.add_paragraph() if len(pre_text) > 0 and len(p2.text.strip()) > 0: nr = np.add_run() nr.add_text("“" + pre_text + "”") nr.bold = True pre_text = "" # Reset it so we only add it once for run in p2.runs: nr = np.add_run() nr.add_text(run.text) copy_run_style(run, nr, keep=["italic"]) note_text = "".join(map(lambda x: x.text, notes[id].paragraphs)) print(f"Note id: {id}") print(f"Note text: {note_text}") print(f"Source paragraph: {pre_text}") print("") d.save(output_filename)
from docx import Document from docx.shared import Inches import os import sys args = sys.argv docxname = 'test.docx' if len(args) == 1 else args[1] sosudPartsBot = ['BOT-1', 'BOT-1-2', 'BOT-1-3', 'BOT-2', 'BOT-2-2', 'BOT-2-3'] sosudParts = ['OBECH', 'UB+OB', 'UB-FAST', 'USIL'] zonesBot = [4, 6, 7, 4, 6, 7] zones = [2, 1, 3, 5] doc = Document() views = {1: 'Слева', 2: 'Справа', 3: 'Вид снизу'} ramaPic = 'ramaMises' for v in [1, 2]: doc.add_picture(ramaPic + '-{}.png'.format(v), width=Inches(6)) doc.add_paragraph('Рисунок - Эффективные напряжения по Мизесу. {}'.format( views[v])) for v in [1, 2]: doc.add_picture('emax-{}.png'.format(v), width=Inches(6)) doc.add_paragraph( 'Рисунок - Максимальная деформация растяжения в сосуде их ПКМ. {}'. format(views[v])) for i, p in enumerate(sosudPartsBot): for v in [1, 2, 3]: fname = '{}-{}.png'.format(p, v)
import docx from docx import Document from docx.shared import RGBColor, Inches, Pt, Cm from docx.oxml.ns import qn from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_UNDERLINE, WD_TAB_ALIGNMENT, WD_TAB_LEADER from docx.enum.style import WD_STYLE_TYPE import os doc = Document("12.docx") doc1 = Document("13.docx") # for style1 in doc1.styles: # print(style1) # print(len(doc.styles)) # for style2 in doc.styles: # print(style2) t = doc1.styles # par_styles = [s for s in t if s.type == WD_STYLE_TYPE.PARAGRAPH] # for style in par_styles: # print(style.name) d = doc.styles d1 = d.add_style("yangshi", WD_STYLE_TYPE.CHARACTER) print(d1.name) for para in doc.paragraphs: print(para.text) # s = doc1.paragraphs[0] # s1 = doc.paragraphs[0] # s.paragraph_format.space_after = Inches(0.5)
def convert(self, input, output, type): if type == 'iconDOCX': print 'Opening {}'.format(input) docx = Document(input) else: tmp = './tmp-{}'.format(input.replace('comlaw/', '')) print 'converting from {} {}'.format(type, input) command = 'textutil -convert docx -stdout ./{} >| {}'.format( input, tmp) subprocess.check_call(command, shell=True) docx = Document(tmp) os.remove(tmp) out = [] for p in docx.paragraphs: if not p.text.strip(): continue if p.style.font and p.style.font.size: p_size = p.style.font.size.pt elif p.style.base_style and p.style.base_style.font and p.style.base_style.font.size: p_size = p.style.base_style.font.size.pt else: p_size = 11 _indent = para_indent(p) if _indent > 0: indent = int(_indent / p_size) else: indent = 0 sizes = collections.defaultdict(int) for r in p.runs: sizes[r.font.size.pt if r.font. size is not None else p_size] += 1 #Font size is the most common of the header font_size = pts_to_header( sorted(sizes.iteritems(), key=lambda k: k[1], reverse=True)[0][0]) for r in p.runs: text = r.text for frm, to in code_map: text = text.replace(frm, to) text = text.encode('ascii', 'replace') #Remove EMBED WORD PICTURE if text.strip().startswith('EMBED'): continue remove = r'(DOCPROPERTY ([a-zA-Z]+)|TOC \\o|PAGEREF _[a-zA-Z0-9]+ \\h [\d]+|STYLEREF [a-zA-Z0-9]+)' text = re.sub(remove, '', text) out.append( TEXT(1 if r.bold == True and font_size == 0 else 0, 1 if r.italic == True and font_size == 0 else 0, indent, font_size, text)) #Paragraph out.append(NEWPARA()) return out
import os import re import xlwt import time from docx import Document path = "C:\\Users\\songjie\\Desktop\\qwer" for files in os.walk(path): # print(files[2]) print(len(files[2])) used = 0 # for a in range(len(files[2])):\ wr = xlwt.Workbook() sheet1 = wr.add_sheet('Sheet1', cell_overwrite_ok=True) for q in range(0, len(files[2])): doc = Document("C:\\Users\\songjie\\Desktop\\qwer\\%s" % files[2][q]) print("processing:%s" % files[2][q]) print(files[2][q]) for x in range(1, len(doc.tables[0].rows)): print(x) print("1 %s" % time.time()) a = doc.tables[0].rows[x] print("2 %s" % time.time()) x -= 1 name = a.cells[1].paragraphs[0].text sheet1.write(x + used, 0, name) temp = re.findall('(\d+)', files[2][q]) year = temp[0] sheet1.write(x + used, 3, year) certificate_number = a.cells[2].paragraphs[0].text
def lab_1_check_answer(student_path, correct_path): response_message = {} response_message["errors"] = [] response_message["custom_header_style"] = {} response_message["custom_main_style"] = {} response_message["document_margins"] = {} response_message["document_header"] = {} response_message["footnote"] = {} response_message["table_of_contents"] = {} response_message["document_numbering"] = {} response_message["headers_style"] = {} try: # document_path = 'data/internet_tables.docx' document_student = Document(student_path) document_correct = Document(correct_path) header_style_name = "Заголовок_ФИО" main_style_name = "Основной_ФИО" if is_table_of_contents(document_student): response_message["table_of_contents"][ "message"] = "Оглавление создано" response_message["table_of_contents"]["status"] = True else: response_message["table_of_contents"][ "message"] = "Оглавление не создано" response_message["table_of_contents"]["status"] = False if is_document_numbering(document_student): response_message["document_numbering"][ "message"] = "Страницы пронумерованы" response_message["document_numbering"]["status"] = True else: response_message["document_numbering"][ "message"] = "Страницы не пронумерованы" response_message["document_numbering"]["status"] = False main_style_student = get_custom_header_style(document_student, main_style_name) main_style_correct = get_custom_header_style(document_correct, main_style_name) if main_style_student == main_style_correct: response_message["custom_main_style"][ "message"] = "Основной стиль создан верно" response_message["custom_main_style"]["status"] = True else: response_message["custom_main_style"][ "message"] = "Основной стиль создан неверно" response_message["custom_main_style"]["status"] = False header_style_student = get_custom_header_style(document_student, header_style_name) header_style_correct = get_custom_header_style(document_correct, header_style_name) if header_style_student == header_style_correct: response_message["custom_header_style"][ "message"] = "Стиль заголовков создан верно" response_message["custom_header_style"]["status"] = True else: response_message["custom_header_style"][ "message"] = "Стиль заголовков создан неверно" response_message["custom_header_style"]["status"] = False if get_footnotes(student_path) == get_footnotes(correct_path): response_message["footnote"]["message"] = "Сноска создана верно" response_message["footnote"]["status"] = True else: response_message["footnote"]["message"] = "Сноска создана неверно" response_message["footnote"]["status"] = False if get_docement_headers(document_student, header_style_name) == get_docement_headers( document_correct, header_style_name): response_message["headers_style"][ "message"] = "Стиль к заголовкам применен верно" response_message["headers_style"]["status"] = True else: response_message["headers_style"][ "message"] = "Стиль к заголовкам применен неверно" response_message["headers_style"]["status"] = False if get_document_header(document_student) == get_document_header( document_correct): response_message["document_header"][ "message"] = "Верхний колонтитул применен верно" response_message["document_header"]["status"] = True else: response_message["document_header"][ "message"] = "Верхний колонтитул применен неверно" response_message["document_header"]["status"] = False if get_document_margins(document_student) == get_document_margins( document_correct): response_message["document_margins"][ "message"] = "Отступы применены верно" response_message["document_margins"]["status"] = True else: response_message["document_margins"][ "message"] = "Отступы применены неверно" response_message["document_margins"]["status"] = False except: response_message["errors"].append('Ошибка при открытии файла') return response_message
def export(self): if self.filename != '': rows_order = ['流动资产合计', '非流动资产合计', '资产总计', '流动负债合计', '非流动负债合计', '负债合计', '实收资本', '所有者权益合计', '营业收入', '营业成本', '利润总额', '净利润'] self.chooseFolder() ''' doc = Document('老板希望的导出效果.docx') table = doc.tables[0] for row in table.rows[1:]: key = row.cells[0].text row.cells[1].text = str(self.results[key]['end']) row.cells[2].text = str(self.results[key]['start']) para = doc.paragraphs for p in para: for r in p.runs: if r.underline: print(r.underline) else: print(r.text) doc.save('test.docx') ''' doc = Document() doc.styles['Normal'].font.name = '宋体' doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体') doc.add_heading(self.results['company'], 0) table = doc.add_table(rows=1, cols=4, style='Table Grid') hdr_cells = table.rows[0].cells hdr_cells[0].text = '科目' hdr_cells[1].text = '年初(万元)' hdr_cells[2].text = '期末(万元)' hdr_cells[3].text = '变动(%)' for item in rows_order: row_cells = table.add_row().cells row_cells[0].text = item # 鉴于数据的不完整性,可能会出现KeyError异常(item)导致程序退出,这里使用try来捕捉异常,做兼容性处理 try: row_cells[1].text = str(self.results[item]['start']) row_cells[2].text = str(self.results[item]['end']) row_cells[3].text = str(self.results[item]['change']) except KeyError: row_cells[1].text = row_cells[2].text = row_cells[3].text = '' try: ts = self.results['资产总计'] except KeyError: ts = {'start': ' ', 'end': ' ', 'change': ' '} try: td = self.results['负债合计'] except KeyError: td = {'start': ' ', 'end': ' ', 'change': ''} try: oi = self.results['营业收入'] except KeyError: oi = {'start': ' ', 'end': ' ', 'change': ' '} try: np = self.results['净利润'] except KeyError: np = {'start': ' ', 'end': ' ', 'change': ' '} doc.add_paragraph('截至报告期末,项目公司总资产{0}万元,相对上期增长{1}%;总负债{2}万元,相对上期增长{3}%。收入和利润方面,报告期末公司营业收入{4}万元,相对上期增长{5}%;净利润{6}万元,相对上期增长{7}%。'.format(ts['end'], ts['change'], td['end'], td['change'], oi['end'], oi['change'], np['end'], np['change'])) doc.add_paragraph('通过上述信息,公司各项财务指标(有/无)异常,报告期内(有/无)明显变化。总体来看,公司财务状况()。') if self.foldername !='': doc.save(os.path.join(self.foldername, '{}.docx'.format(self.results['company']))) else: doc.save(os.path.join(os.getcwd(), '{}.docx'.format(self.results['company']))) tkinter.messagebox.showinfo('提示', '未选择文件存放位置,默认存放于当前文件夹!') else: tkinter.messagebox.showerror('错误', '未选择文件!')
# coding: utf8 #есть смысл переделать под пи3 from docx import Document document = Document('1.docx') fileProblems = open('bad.txt', 'w') filedoctype = open('doctype.txt', 'w') fileautors = open('autors.txt', 'w') fileautorsUkazatel = open('autorsUkazatel.txt', 'w') filelastautorsUkazatel = open('lastautorsUkazatel.txt', 'w') intNumOfZapis = 0 intColOfDonFindZapis = 0 testingString = "" dataOfAllFindUniqKeysOfAll = {} listOfKeyWords = [] listOfAuthors = [] nonSortedSlovarOfAUtors = {} flagIsAllZapisRegonuzeBuType = False flagIsAllZapisRegonuzeBuAutorsField = False flagIsPresenBetterKeyForRedactors = False class Zapis: """обьект записи """ def CheckRazdeliteli(): """описывает правила для разделителей""" pass def CheckTypeOfDoc(): """описывает типы док-та""" pass
def given_a_run(context): p = Document().add_paragraph() context.run = p.add_run()
def __init__(self): self.document = Document()
def convert_in_word(request): document = Document() style = document.styles['Normal'] font = style.font font.name = 'Calibri' font.size = Pt(10) margin = 1.5 sections = document.sections for section in sections: section.top_margin = Cm(margin) section.bottom_margin = Cm(margin) section.left_margin = Cm(margin) section.right_margin = Cm(margin) footer_section = sections[0] footer = footer_section.footer tree = ET.parse(request.FILES.get('file')) root = tree.getroot() exporter_ragione_sociale = root.find("./Esportatore_1/RagioneSociale").text exporter_address = root.find("./Esportatore_1/Indirizzo").text exporter_comune = root.find("./Esportatore_1/Comune").text exporter_cap = root.find("./Esportatore_1/CAP").text exporter_country_code = root.find("./Esportatore_1/CodPaese").text exporter_string = exporter_ragione_sociale + "\n" + exporter_address + "\n" + exporter_comune + " " + exporter_cap + "\n" + exporter_country_code provenience = root.find("./Origine_4").text destinatario_ragione_sociale = root.find( "./Destinatario_3/RagioneSociale").text destinatario_address = root.find("./Destinatario_3/Indirizzo").text destinatario_comune = root.find("./Destinatario_3/Comune").text destinatario_cap = root.find("./Destinatario_3/CAP").text destinatario_country_code = root.find("./Destinatario_3/CodPaese").text destinatario_string = destinatario_ragione_sociale + "\n" + destinatario_address + "\n" + destinatario_comune + " " + destinatario_cap + "\n" + destinatario_country_code arrival = root.find("./Destinazione_5").text table = document.add_table(rows=0, cols=2) row_cells = table.add_row().cells row_cells[0].text = exporter_string row_cells[0].style = document.styles['Normal'] par = row_cells[1].add_paragraph() par.text = provenience par.alignment = WD_ALIGN_PARAGRAPH.RIGHT par.style = document.styles['Normal'] row_cells = table.add_row().cells row_cells[0].text = destinatario_string row_cells[0].style = document.styles['Normal'] par = row_cells[1].add_paragraph() par.text = arrival par.alignment = WD_ALIGN_PARAGRAPH.RIGHT par.style = document.styles['Normal'] table = document.add_table(rows=0, cols=2) row_cells = table.add_row().cells nested_table = row_cells[1].add_table(rows=0, cols=2) nested_table_cells = nested_table.add_row().cells nested_table_cells[0].text = provenience nested_table_cells[1].text = arrival document.add_paragraph('\n\n\n\n\n\n\n\n\n\n') table = document.add_table(rows=0, cols=2) for casella_8 in root.findall("./Casella_8_9_10/Casella_8"): row_cells = table.add_row().cells row_cells[0].text = casella_8.find( "./Progressivo").text + " " + casella_8.find( "./TotaleColli").text + casella_8.find( "./CodiceConfezione").text + casella_8.find( "./DescrizioneMerce").text row_cells[0].style = document.styles['Normal'] row_cells[1].text = casella_8.find("./Progressivo").text row_cells[1].style = document.styles['Normal'] cert_id = root.find("./Cert_ID").text visto_modello = root.find("./VistoDogana_11/Modello").text visto_numero = root.find("./VistoDogana_11/Numero").text luogo = root.find("./VistoDogana_11/Luogo").text data = root.find("./VistoDogana_11/Data").text[0:10] data2 = root.find("./VistoDogana_11/Del").text[0:10] data = datetime.strptime(data, "%Y-%m-%d") data = datetime.strftime(data, "%d/%m/%Y") data2 = datetime.strptime(data2, "%Y-%m-%d") data2 = datetime.strftime(data2, "%d/%m/%Y") footer_string = "Certificato" + cert_id + "\n" + "Versione 1\n\n\n" footer_string += visto_modello + " " + visto_numero + "\t\t" + luogo + " " + data + "\n" footer_string += data2 + "\n" + "28100 UD PARMA" footer_par = footer.paragraphs[0] footer_par.text = footer_string # document.add_page_break() dirspot = os.getcwd() dateTimeObj = datetime.now() timestampStr = dateTimeObj.strftime("%d%b%Y%H%M%S%f") document.save(dirspot + '/converter/static/files/' + timestampStr + '.docx') url = static('files/' + timestampStr + '.docx') return HttpResponse(url)
import csv import os from docx import Document from docx.shared import Inches #create AnswerKey document documentAnswerKey = Document() #create BlankQuiz document documentBlankQuiz = Document() #add a AnswerKey title documentAnswerKey.add_heading('CFA Formula Quiz Answer Key', 0) #add a BlankQuiz title documentBlankQuiz.add_heading('CFA Formula Quiz', 0) #BEGIN LOOP #open CSV with open("guide.csv") as csvfile: reader = csv.DictReader(csvfile) for row in reader: #find row marked Count NumberRef = (row['NumberRef']) #find row marked FormulaName FormulaName = (row['FormulaName']) #print question onto AnswerKey documentAnswerKey.add_paragraph(str(NumberRef) + ") " + FormulaName)
def read_file(file_name): document = Document(file_name) file_content = [] for con in document.paragraphs: file_content.append(con.text) return file_content
parser.add_argument('--dest') parser.add_argument('--use-braces', nargs=2, default=None) parser.add_argument('--remove-empty-braces', action='store_true') parser.add_argument('--remove-unreplaced-braces', action='store_true') parser.add_argument('--remove-empty-row', type=int) args = parser.parse_args() # input validity checks if not args.old_text and not args.replace_list_file: raise Exception( 'You must specify either old_text and new_text argument or use a json spec file' ) if args.old_text and not args.new_text: raise Exception('You must specify new_text if you specified old_text', args.old_text, args.new_text) # process document = Document(args.original_docx_path) if args.replace_list_file: replace_list = readReplaceListFromJson(args.replace_list_file) else: replace_list = [{"old_text": args.old_text, "new_text": args.new_text}] multiReplace(document, replace_list, vars(args)) # save dest = args.dest or args.original_docx_path document.save(dest)
def read_words(self): document = Document(self.file_in) self.words = [] for p in document.paragraphs: self.words.extend(p.text.split(' '))
def given_a_run_having_bool_prop_set_on(context, bool_prop_name): run = Document().add_paragraph().add_run() setattr(run, bool_prop_name, True) context.run = run
def generate_word_report(journal_name, table_name,info,df,cwd,invalid_lines): document = Document() document.add_heading(journal_name, 0) document.add_heading('Journal Analysis...', level=1) journal_info = general_info(journal_name) document.add_paragraph('SCI: '+journal_info['sci']) document.add_paragraph('SSCI: '+journal_info['ssci']) document.add_paragraph('University Top Journal: '+journal_info['UnivRank']) document.add_paragraph('International Business School Top Journal: '+journal_info['IBSRank']) document.add_paragraph('ISSN: '+journal_info['issn']) document.add_paragraph('Publication Frequency: '+journal_info['freq'].upper()) document.add_paragraph('Press: '+journal_info['press'].upper()) document.add_paragraph('City: '+journal_info['city'].upper()) document.add_paragraph('Country: '+journal_info['country'].upper()) document.add_paragraph('Address: '+journal_info['address'].upper()) #document.add_paragraph('Journal Statistics') document.add_heading('Journal Statistics: ({} out of {} entries have been deleted)'.format(invalid_lines, invalid_lines+len(df)), level=1) # add table ------------------ rows = 1 cols = 5 table = document.add_table(rows=rows, cols=cols, style = "Light List Accent 1") # populate header row ----- column_names = ['Year','AveragePage','NumPublication','ChineseAuthor','ChineseUniv'] heading_cells = table.rows[0].cells for i in range(len(column_names)): heading_cells[i].text = column_names[i] for item in info: cells = table.add_row().cells cells[0].text = item['year'] cells[1].text = str(int(item['aver_page'])) cells[2].text = str(item['num_pubs']) cells[3].text = str(item['percent_cn_author'])+'%' cells[4].text = str(item['percent_cn_univ'])+'%' document.add_heading('Journal Keywords Distributions - Top 10 most popular Keywords', level=1) # add table ------------------ num_of_keywords = 10 rows = num_of_keywords + 1 cols = len(info) table = document.add_table(rows=rows, cols=cols,style = "Table Grid") # populate header row ----- heading_cells = table.rows[0].cells for i in range(len(info)): heading_cells[i].text = info[i]['year'] for c in range(cols): for r in range(1,rows): # The first row has been populated, start from the second row run=table.cell(r,c).paragraphs[0].add_run(info[c]['list_keyw'][r-1]) run.font.size=Pt(8) # cell = table.cell(r, c) # cell.text = info[c]['list_keyw'][r-1] # add images ------------------ document.add_heading('Journal Keywords Cloud by Years', level=1) plot_wordcloud(info,table_name,cwd) document.add_picture(cwd+'\\'+table_name+'_wc.jpg', width=Cm(17)) document.add_page_break() # add a new page ------------------ document.add_heading('Country Distribution', level=1) labels, nums = plot_country_distribution(df,table_name, cwd) document.add_picture(cwd+'\\'+table_name+'_cd.jpg', width=Cm(15)) document.add_heading('List of most productive country...', level=2) for i in range(10): p = document.add_paragraph('Productive country ', style='ListBullet') p.add_run('No. '+str(i+1)+' is ') p.add_run(labels[i]).bold = True p.add_run(', which has ') p.add_run(str(nums[i])).bold = True p.add_run(' publications.') if 'China' not in labels: document.add_heading('There is NOT publication from any Chinese university...', level=2) else: i = labels.index('China') document.add_heading('China is ranked at No. '+str(i+1)+' and has '+str(nums[i])+' Publications in this journal.', level=2) document.add_page_break() # add a new page ------------------ document.add_heading('Institute Distribution', level=1) labels, nums = plot_institute_distribution(df,table_name, cwd) document.add_picture(cwd+'\\'+table_name+'_id.jpg', width=Cm(15)) document.add_heading('List of most productive institute...', level=2) for i in range(10): p = document.add_paragraph('Productive institute ', style='ListBullet') p.add_run('No. '+str(i+1)+' is ') p.add_run(labels[i]).bold = True p.add_run(', which has ') p.add_run(str(nums[i])).bold = True p.add_run(' publications.') # Top chinese universities ------ cn_ins_hist = cal_cn_univ_distribution(df) if not cn_ins_hist: document.add_heading('We are on the way...', level=2) else: document.add_heading('List of most productive Chinese institute...', level=2) label_list = [c[0] for c in cn_ins_hist] num_list = [c[1] for c in cn_ins_hist] for i in range(min(10,len(cn_ins_hist))): # display top 10 (if available) universities p = document.add_paragraph('Productive institute ', style='ListBullet') p.add_run('No. '+str(i+1)+' is ') p.add_run(label_list[i]).bold = True p.add_run(', which has ') p.add_run(str(num_list[i])).bold = True p.add_run(' publications.') # add a new page ------------------ # add table ------------------ univs = ['Zhejiang Gongshang Univ','Zhejiang Univ Finance & Econ','Shanghai Univ Finance & Econ', 'Jiangxi Univ Finance & Econ', 'Cent Univ Finance & Econ', 'Dongbei Univ Finance & Econ', 'Southwestern Univ Finance & Econ','Univ Int Business & Econ','Zhongnan Univ Econ & Law'] for univ in univs: document.add_heading('Publication Record of '+univ, level=1) index = any_univ(df, univ) if not index: document.add_paragraph(univ+' has not been contributed to this journal yet...') else: rows = 1 cols = 5 table = document.add_table(rows=rows, cols=cols,style = "Light List Accent 1") #table.autofit=True table.cell(0,0).width=Cm(0.5) table.cell(0,1).width=Cm(1) table.cell(0,2).width=Cm(2.5) table.cell(0,3).width=Cm(2.5) table.cell(0,4).width=Cm(8) # populate header row ----- column_names = ['ID','Year','Author(s)','School','Title'] heading_cells = table.rows[0].cells for i in range(len(column_names)): heading_cells[i].text = column_names[i] i = 0 for it in range(len(index)): item = df.iloc[index[it],:] i += 1 cells = table.add_row().cells cells[0].text = str(i) cells[1].text = item['year'] cells[2].text = item['authors'] cells[3].text = item['paddress'].split(',')[1].strip() cells[4].text = item['title'] # save to doc ------------------ document.save(cwd+'\\'+table_name+'.docx')
def given_a_run_having_underline_type(context, underline_type): run_idx = { 'inherited': 0, 'no': 1, 'single': 2, 'double': 3 }[underline_type] document = Document(test_docx('run-enumerated-props')) context.run = document.paragraphs[0].runs[run_idx]
""" Created on Thu Sep 19 14:38:30 2019 @author: wb550776 """ import numpy as np import pandas as pd import nltk nltk.download('punkt') # one time execution import re import docx from nltk.tokenize import sent_tokenize #pip uninstall docx #pip install python-docx from docx import Document document = Document() document = Document( 'C:/Users/wb550776/Downloads/Using ML in Evaluative Synthesis May 31 2019.docx' ) #Open the article def read_article(filename): file = open(r'C:/Users/wb550776/Downloads/10144-Ghana-ELAC EvNote.docx', encoding='utf-8') #file2 = open(r'C:/Users/wb550776/Downloads/README.txt') filedata = file.readlines() article = filedata[0].split(". ") sentences = []
def given_a_run_having_style_char_style(context, char_style): run_idx = { 'None': 0, 'Emphasis': 1, 'Strong': 2 }[char_style] document = Document(test_docx('run-char-style')) context.run = document.paragraphs[0].runs[run_idx]
# 利用python读取docx文件 ''' 1. 首先需要安装python-docx库 ''' from docx import Document document = Document("d:\\1.docx") for para in document.paragraphs: para.append('dd') print(para.text)
def write_report(info_request,print_to_pdf=True,\ path_report_folder=os.path.join(os.environ['CRF_DATA'],'Request_reports')): if info_request['table_finished'][0].lower() != 'yes': print('WARNING!! Table is marked as NOT FINISHED.') if info_request['final_validation_done'][0].lower() != 'yes': print('WARNING!! Table is marked as NOT VALIDATED.') footnote_list = [] now = datetime.now() name_file_report = info_request['request_number'][ 0] + '_Report_' + now.strftime('%Y%m%dT%H%M%S') name_file_emails = info_request['request_number'][ 0] + '_Report_' + now.strftime('%Y%m%dT%H%M%S') + '_emails' path_file_report = os.path.join(path_report_folder, name_file_report + '.docx') path_file_emails = os.path.join(path_report_folder, name_file_emails + '.txt') copyfile(r'xxx_report.docx', path_file_report) document = Document(path_file_report) document.styles['Normal'].font.name = 'Raleway' document.styles['Normal'].font.size = Pt(12) if info_request['final_outcome'][ 0] == 'Resolved: Task successfully finished': info_request['final_outcome'][0] = 'Success' # Title p = document.add_paragraph() run = p.add_run('Request ' + info_request['request_number'][0]) run.font.bold = True run.font.size = Pt(16) p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER # Basic info about the request p = document.add_paragraph() p = document.add_paragraph() p.add_run('Request description: ').bold = True p.add_run(info_request['description_task'][0]) p = document.add_paragraph() p.add_run('Requester\'s institution: ').bold = True p.add_run(info_request['requester']['full_affiliation'][0]) p = document.add_paragraph() p.add_run('Outcome: ').bold = True p.add_run(info_request['final_outcome'][0] + chr(8212) + info_request['description_success'][0]) p = document.add_paragraph() # Volunteers p = document.add_paragraph() p.add_run('Volunteers:').bold = True footnote_list_role = [] # any_volunteer = False any_role = False for i_volunteer in range(len(info_request['volunteer']['email'])): # if info_request['volunteer']['email'][i_volunteer]!='none': # any_volunteer = True p = document.add_paragraph() if len(info_request['volunteer']['role'])>i_volunteer \ and not(not(info_request['volunteer']['role'][i_volunteer])): add_footnote(p, footnote_list_role, info_request['volunteer']['role'][i_volunteer], type_symbol='symbol') any_role = True p.add_run( write_name_and_affiliation(info_request['volunteer'], i_volunteer)) # Volunteers' roles if any_role: p = document.add_paragraph() p = document.add_paragraph() p.add_run('Volunteers\' roles:').bold = True print_footnotes(document, footnote_list_role, font_size=12) # Timeline p = document.add_paragraph() p = document.add_paragraph() p.add_run('Timeline:').bold = True if info_request['date']['request'][0] != 'none': p = document.add_paragraph('Date of request: ' \ + datetime.strptime(info_request['date']['request'][0],'%d/%m/%Y %H:%M').strftime('%B %d, %Y')) deltaT = datetime.strptime(info_request['date']['first_response'][0],'%d/%m/%Y %H:%M') \ - datetime.strptime(info_request['date']['request'][0],'%d/%m/%Y %H:%M') p = document.add_paragraph( deltaT_to_string(deltaT) + ' from request to first response') add_footnote( p, footnote_list, 'The first response from Crowdfight typically contains a feasibility estimate for the request.' ) deltaT = datetime.strptime(info_request['date']['proposal'][0],'%d/%m/%Y %H:%M') \ - datetime.strptime(info_request['date']['first_response'][0],'%d/%m/%Y %H:%M') p = document.add_paragraph( deltaT_to_string(deltaT) + ' from first response to proposal') add_footnote( p, footnote_list, 'The proposal typically includes making first contact between requester and volunteers.' ) deltaT = datetime.strptime(info_request['date']['success'][0],'%d/%m/%Y %H:%M') \ - datetime.strptime(info_request['date']['proposal'][0],'%d/%m/%Y %H:%M') p = document.add_paragraph( deltaT_to_string(deltaT) + ' from proposal to resolution') else: p = document.add_paragraph('Date of request: None') p = document.add_paragraph('Date of resolution') add_footnote( p, footnote_list, 'For successful requests, this is the date in which we confirmed that the success criterion was reached (we try to be unintrusive during follow-up, so this confirmation can have significant delay). The success criterion is defined beforehand, and is as stringent as possible to guarantee that our activity is truly useful.' ) p.add_run(': ' + datetime.strptime(info_request['date']['success'][0], '%d/%m/%Y %H:%M').strftime('%B %d, %Y')) # Crowdfight staff p = document.add_paragraph() type_person_list = [['receiver', 'Receiver'], ['advisor_manager', 'Advisor manager'], ['advisor', 'Scientific advisor'], ['coordinator', 'Coordinator'], ['follow_up', 'Follow up'], ['documenter', 'Documenter']] text_footnote_person = [ 'The Receiver is in charge of receiving the request, understanding it (with help of other coordinators and advisors), and writing the request to be sent in the task distribution e-mail (when appropriate).', 'The advisor manager handles the assignment of requests to the scientific advisors', 'The scientific advisor reviews the self-assessments submitted by the volunteers, shortlists the most adequate ones for the request, and recommends a course of action to the coordinator.', 'The coordinator decides a course of action with help from the scientific advisor, makes a proposal to the requester and contacts the selected volunteers.', 'The person in charge of follow-up stays in touch with requester and volunteers to make sure they have everything they need until the resolution of the request.', 'The documenter maintains our database and gathers the necessary information to produce this report.' ] for i_person, person in enumerate(type_person_list): p = document.add_paragraph() p.add_run(person[1]).bold = True add_footnote(p, footnote_list, text_footnote_person[i_person], bold=True) p.add_run(': ').bold = True # print(person) # print(write_name_and_affiliation(info_request[person[0]], 0)) p.add_run(write_name_and_affiliation(info_request[person[0]], 0)) # if info_request[person[0]]['email'][0]!='none': # p.add_run(info_request[person[0]]['full_name'][0] + ' | ' + info_request[person[0]]['full_affiliation'][0]) # else: # p.add_run('None') # Notes p = document.add_paragraph() p = document.add_paragraph() p.add_run('Notes:').bold = True print_footnotes(document, footnote_list) p = document.add_paragraph() p = document.add_paragraph() if not (info_request['doi']['static']): doi_static = 'xxxx' else: doi_static = info_request['doi']['static'][ -1] # Take the last (we'll keep adding as we make new versions) if not (info_request['doi']['updated']): doi_updated = 'xxxx' else: doi_updated = info_request['doi']['updated'][0] run = p.add_run('This report belongs to the internal documentation of Crowdfight, a non-for profit association registered in the EU with number ES-G47805569. Reports are published for transparency with the explicit agreement of all people mentioned. The report was generated automatically from our records on 08-Jun-2020 15:23:09. While we do our best to keep faithful records, it may contain unintentional inaccuracies or omissions. Any modification of this document is strictly forbidden. This document can be found at https://doi.org/' \ + doi_static + ', and up to date versions can be found at https://doi.org/' \ + doi_updated + '.') run.font.size = Pt(9) document.save(path_file_report) print_emails(info_request, file_path=path_file_emails) if print_to_pdf: printout_file(path_file_report, wait_for_pdf=True)
tempList = [] num = 30 index = 0 while index < len(nameList): tempList.append(nameList[index]) if (index + 1) % num == 0 or (index + 1) == len(nameList): list1 = copy.deepcopy(tempList) docxList.append(list1) tempList.clear() index += 1 a = 0 while a < len(docxList): docName = "F:\DiYanYuan\python\word\第一次\大于400\\all_合并\\" + str(a) + ".docx" # 新建空白文档 new_document = Document() composer = Composer(new_document) for file in docxList[a]: composer.append(Document(file)) composer.save(docName) a += 1 #不正确 重叠了 # a = 0 # while a <len(docxList): # docName = "F:\DiYanYuan\python\word\第一次\大于400\\all_合并\\" + str(a) + ".docx" # # 启动word应用程序 # word = win32com.client.Dispatch('Word.Application') # word.Visible = False # # 新建空白文档
pat = '结算通知书' m = re.search(pat, dir.decode('gbk').encode('utf8')) if m: allfile.append(dir) # 获得所有文档名中带有结算通知书的文件 #print allfile print len(allfile) findf = [] for dir in allfile: try: #document=Document(dir.decode("utf8")) temp = 0 #document=Document(path+"\\"+dir) document = Document(dir) doc_new = Document() t = document.tables[0] for r in t.rows: for c in r.cells: doc_new.add_paragraph(c.text, style=None) a1.append(doc_new.paragraphs[1].text) findf.append(1) for inx, p in enumerate(doc_new.paragraphs): tt = p.text pat = '利率收益金额'.decode("utf8") m = re.search(pat, tt) if m: if temp == 0:
def data_sheet(request,id): idprefix=request.POST['idprefix'] print(idprefix,'jjjjjjjjjjjj') doc_final_path ='E:/certa-drdo/certa/TA_Datasheet.docx' final_path ='E:/certa-drdo/certa/' # finalpath=final_path.replace('/','\\') pdf_final_path ='E:/certa-drdo/certa/TA_Datasheet.pdf' if os.path.isfile(pdf_final_path): with open(pdf_final_path, 'rb') as pdf: response = HttpResponse(pdf.read(),content_type='application/pdf') response['Content-Disposition'] = 'filename=some_file.pdf' return response elif os.path.isfile(doc_final_path): print('mmmmmmmmmmmmmm') pythoncom.CoInitialize() wdFormatPDF = 17 # print(tempfile.gettempdir(),'temp') in_file = os.path.abspath(doc_final_path) # out_file = os.path.abspath('D:/cemilac/certa/defence/media/org1.pdf') word = comtypes.client.CreateObject('Word.Application') doc = word.Documents.Open(in_file) doc.SaveAs('E:/certa-drdo/certa/TA_Datasheet.pdf', FileFormat=wdFormatPDF) print('nnnnnnnnnnn') doc.Close() word.Quit() with open(final_path+'TA_Datasheet.pdf', 'rb') as pdf: response = HttpResponse(pdf.read(),content_type='application/pdf') response['Content-Disposition'] = 'filename=some_file.pdf' return response else: curr_path = "/"+str(id)+ "/"+idprefix+"Annexure 6/" curr_path=curr_path.replace('/','\\') new_path = os.path.join(settings.MEDIA_ROOT + curr_path) # if os.path.isdir(new_path): # with open(new_path+'TA Datasheet.docx', 'rb') as pdf: # response = HttpResponse(pdf.read(),content_type='application/pdf') # response['Content-Disposition'] = 'filename=some_file.pdf' # return response # else: taa=TAapplicationmodel.objects.filter(user_id=id).first() # template = get_template('dealing officer/Draft TA pdf.html') target_file = StringIO() template = DocxTemplate("E:/certa-drdo/certa/dashboard/templates/dealing officer/DS template.docx") context= { 'firmname':taa.firmname, 'addr1':taa.addr1, 'item_name':taa.item_name, 'part_no':taa.part_no } html = template.render(context) doc_io = io.BytesIO() # create a file-like object template.save("TA_Datasheet.docx") # save data to file-like object new_path1 = 'E:\certa-drdo\certa\TA_Datasheet.docx' # output_path = os.path.join(settings.MEDIA_ROOT) + '/89/result.pdf' # new_path=new_path.replace('\','//') taa=TAapplicationfiles.objects.filter(user_id=id,refid=idprefix,refpath='Annexure 6').first() aesurl=taa.filepath docurl = aesurl[:-4] print('aesview',aesurl) print('docurl',docurl) bufferSize = 64 * 1024 passw = "#EX\xc8\xd5\xbfI{\xa2$\x05(\xd5\x18\xbf\xc0\x85)\x10nc\x94\x02)j\xdf\xcb\xc4\x94\x9d(\x9e" encFileSize = stat(aesurl).st_size with open(aesurl, "rb") as fIn: with open(docurl, "wb") as fOut: pyAesCrypt.decryptStream(fIn, fOut, passw, bufferSize, encFileSize) templateDoc1 = Document(new_path1) templateDoc = Document(docurl) # templateDoc1.add_page_break() for element in templateDoc.element.body: templateDoc1.element.body.append(element) templateDoc1.save(new_path1) messages.success(request, 'Data_sheet Successfully Prepared, Click again to view the file !') reg=TAapplicationmodel.objects.filter(file_in_id=str(request.user.id)) return render(request, 'tcs do/receivedtyperecord.html',{'details':reg,'status':True})
fontsize=4.5, verticalalignment='center', horizontalalignment='center') plt.axis('off') plt.savefig(f'{path}/{jpgname}.jpg') plt.cla() plt.close("all") im = Image.open(f'{path}/{jpgname}.jpg') im = trim(im) im.save(f'{path}/{jpgname}.jpg') return im.size[0] / 96 print('文档格式初始化……') # 创建文档对象 calc_book = Document() # 设置正文字体 calc_book.styles['Normal'].font.name = 'Italic' calc_book.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑') calc_book.styles['Normal'].font.size = Pt(12) calc_book.styles['Normal'].font.color.rgb = RGBColor(0x00, 0x00, 0x00) calc_book.styles[ 'Normal'].paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT # 首行缩进,字符宽度等于字符高度,12pt=4.23mm, 1pt=0.3527mm calc_book.styles['Normal'].paragraph_format.first_line_indent = Mm(8.46) calc_book.styles[ 'Normal'].paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE calc_book.styles['Normal'].paragraph_format.space_before = Mm(0) calc_book.styles['Normal'].paragraph_format.space_after = Mm(0) # 增加一个居中显示的样式 calc_book.styles['No Spacing'].paragraph_format.first_line_indent = Mm(0)
import requests, re from math import * from bs4 import BeautifulSoup # HTML을 파싱하는 모듈 import os, random, itertools, time from docx import Document from docx.shared import Pt from docx.enum.text import WD_ALIGN_PARAGRAPH ### 시간 측정 ### first_time = time.time() ## 파일 생성## templit = Document("English_Composition.docx") check = Document() def googling(key1=str): url = "http://www.google.com/search?q=" + key1 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'} html = requests.get(url, headers=headers ).text soup = BeautifulSoup(html, 'html.parser') div = soup.find('div', id='result-stats') if not div: return 0 result_characters = " ".join(div.text.replace(",","")).split(" ") for order in range(len(result_characters)): word = result_characters[order] if not word in [str(number) for number in range(0,10)]: if word == "개":
msg_list["PDN CONNECTIVITY REJECT"]["table"] = 56 msg_list["PDN CONNECTIVITY REQUEST"]["table"] = 57 msg_list["PDN DISCONNECT REJECT"]["table"] = 58 msg_list["PDN DISCONNECT REQUEST"]["table"] = 59 for key in msg_list.keys(): if "table" not in msg_list[key].keys(): continue; d_info("[" + key + "]") cachefile = cachedir + "nas-msg-" + msg_list[key]["type"] + ".py" if os.path.isfile(cachefile) and os.access(cachefile, os.R_OK): execfile(cachefile) print "Read from " + cachefile else: document = Document(filename) f = open(cachefile, 'w') ies = [] write_file(f, "ies = []\n") table = document.tables[msg_list[key]["table"]] start_row = 0 for start_row, row in enumerate(table.rows): cells = get_cells(row.cells); if cells["type"].find('Message type') != -1: break if cells["type"].find('KSI and sequence number') != -1: start_row -= 1 break
from docx import Document from docx.shared import Inches document = Document() document.add_heading('Document Title', 0) p = document.add_paragraph('A plain paragraph having some ') p.add_run('bold').bold = True p.add_run(' and some ') p.add_run('italic.').italic = True document.add_heading('Heading, level 1', level=1) document.add_paragraph('Intense quote', style='Intense Quote') document.add_paragraph('first item in unordered list', style='List Bullet') document.add_paragraph('first item in ordered list', style='List Number') document.add_picture('t_0.png', width=Inches(1.25)) records = ((3, '101', 'Spam'), (7, '422', 'Eggs'), (4, '631', 'Spam, spam, eggs, and spam')) table = document.add_table(rows=1, cols=3) hdr_cells = table.rows[0].cells hdr_cells[0].text = 'Qty' hdr_cells[1].text = 'Id' hdr_cells[2].text = 'Desc' for qty, id, desc in records: row_cells = table.add_row().cells row_cells[0].text = str(qty)
특허총칙.docx 특허요건.docx 특허심사.docx 특허등록.docx 특허권.docx 특허권자의보호.docx 특허취소신청.docx 특허심판.docx 특허재심.docx 특허소송.docx 특허국제출원.docx 특허보칙.docx 특허벌칙.docx """ templit = Document('특허취소신청.docx') Result = Document('Today.docx') Keyword = None ################################################################################## """ https://m.blog.naver.com/anakt/221842622079 다음 조항으로 넘어갈 때 '문제 유닛'을 클래스 선언하고, 이전 '문제 유닛'을 '문제 리스트'에 넣는다. """ def data_acquisition(templit): Problem_list = [] Problem_unit = None for row, paragraph in enumerate(templit.paragraphs):