def handle_p_content(self, e, current_part): if e.tag == RUN_TAG: return self.handle_run(e) elif e.tag == HYPERLINK_TAG: internalId = e.attrib.get(ns.r('id')) if internalId is None: ref = '#' + e.attrib[ns.w('anchor')] else: rels = self.doc.get_rels_for(current_part) ref = rels[internalId].attrib['Target'] # 'u', 'span' = nuke bogus color and underline # styling that google docs likes to add to links; # XXX(alexander): rewrite colour less bluntly; # this also nukes background color handle_p = partial(self.handle_p_content, current_part=current_part) body = whack(('u', 'span').__contains__, flatmap(handle_p, e)) if not body: log.warn('hyperlink with no body to: %r', ref) return [mkel('a', {'href': ref}, body)] elif e.tag == BOOKMARK_END_TAG: return [] elif e.tag == BOOKMARK_START_TAG: return [mkel('a', {'name': e.attrib[ns.w('name')]}, [])] elif e.tag == ns.m('oMath'): return self.handle_omath(e) else: log.warn('Ignoring unknown tag %s', e.tag) return []
def get_num_style(self, numid, level): numid_xpath = self.A_NUMID_XPATH_TEMPL % numid abstract_num_id = self.numbering.e.find(numid_xpath).attrib[ns.w('val')] lvl_xpath = self.LVL_XPATH_TEMPL % (abstract_num_id, level) lvl, = self.numbering.e.iterfind(lvl_xpath) numFmt = val(lvl, ns.w('numFmt')) lvlText = val(lvl, ns.w('lvlText')) return NumStyle(numFmt=numFmt, lvlText=lvlText)
def val(e, child_tag, attrib=ns.w('val')): if e is None: return None for child in e.iter(child_tag): return child.attrib.get(attrib) return None
def make_footnote(self, e): # pylint: disable=W0622 id = e.attrib[ns.w('id')] ps = (self.doc.get_footnote if e.tag == FOOTNOTE_REFERENCE_TAG else self.doc.get_endnote)(id).iterfind(P_TAG) footnote_part = 'footnotes' # XXX what about endnotes return mkel('.footnote', {}, [self.handle_p(p, current_part=footnote_part) for p in ps])
def handle_p(self, e, current_part, in_list=False): attrs = {} pPr = first_of_tag(e, P_PROPS_TAG) jc_class = self.JC_TO_CLASS.get(val(pPr, ns.w('jc'))) if jc_class: attrs = add_class(attrs, jc_class) tag = style_to_tag(val(pPr, ns.w('pStyle')) or '') content = iter(e) if pPr is None else pPr.itersiblings() handle_p = partial(self.handle_p_content, current_part=current_part) ans = mkel(tag, attrs, flatmap(handle_p, content)) left_indent = val(pPr, ns.w('ind'), ns.w('left')) or 0.0 indent = int(round(float(left_indent) / self.default_indent_twips)) if (not in_list) and indent: ans = lift_code(ans) ans = mkel('.block', {'indent': indent}, [ans]) ans = hacky_flatten_block(ans) return ans
def process(self, e, handle_p): numPr = e.find(P_PROPS_TAG + '/' + ns.w('numPr')) numid = val(numPr, ns.w('numId')) if not numid: return self.flush() + [handle_p(e, in_list=False)] self.in_list = True # pylint: disable=W0201 level = int(val(numPr, ns.w('ilvl')) or 0) while level != self.level: if level > self.level: self.append_points[-1].append([]) self.append_points.append(self.append_points[-1][-1]) else: self.append_points.pop() self.append_points[self.level].append( (self.list_type(numid, level), handle_p(e, in_list=True))) return []
def parse_sectPr(e): # pylint: disable=C0103 assert e.tag == ns.w('sectPr') d = dict( page_width=map(ns.w, ['pgSz', 'w']), left_margin=map(ns.w, ['pgMar', 'left']), right_margin=map(ns.w, ['pgMar', 'right']), ) return SectPr(**{k: Twips(val(e, *p)) for (k, p) in d.items()})
def handle_run(self, r): # XXX(ash): pylint is right about this being too complex # pylint: disable=R0912 _ = Var('_') ans = [] rPr = first_of_tag(r, RUN_PROPS_TAG) content = rPr.itersiblings() if rPr is not None else iter(r) for e in content: # pylint: disable=W0622 type = e.attrib.get(ns.w('type')) if e.tag == TEXT_TAG: ans.append(e.text) elif e.tag == TAB_TAG: # XXX(alexander): this can also work like a '_' or '…' \dotfill ans.append('\t') elif e.tag in (FOOTNOTE_REF_TAG, ENDNOTE_REF_TAG): # XXX(ash): what is going on here pass elif e.tag == BREAK_TAG and type in ('page', 'column'): ans.append(mkel('.pagebreak', {}, [])) elif e.tag == BREAK_TAG or e.tag == CR_TAG: assert (type is None) or (type == 'textWrapping') ans.append(mkel('br', {}, [])) # FIXME, tags below untested elif e.tag == SOFT_HYPHEN_TAG: ans.append(SOFT_HYPHEN) elif e.tag == NON_BREAKING_HYPHEN_TAG: ans.append(NON_BREAKING_HYPHEN) elif e.tag == ns.w('drawing'): ans.extend( flatmap(self.transclude, e.xpath(self.IMAGE_XPATH, namespaces=ns.dict))) elif e.tag in (FOOTNOTE_REFERENCE_TAG, ENDNOTE_REFERENCE_TAG): ans.append(self.make_footnote(e)) else: # movie, # rt, ruby, rubyAlign etc. for ruby stuff # sym, with special handling for wingdings I guess... log.warn('Unknown tag %r', e.tag) if rPr is not None and ans != Seq[Seq['.footnote', _:], _:]: ans = self.apply_rpr(rPr, ans) return ans
def parse_table(self, e, current_part): # XXX(ash): simplify # pylint: disable=R0914 def cell_bg(tc): if tc[0].tag == TABLE_COLUMN_PROPERTIES_TAG: bg = val(tc[0], ns.w('shd'), ns.w('fill')) if bg: return add_bg({}, '#' + bg) return {} def skip_past(e, child): if e[0].tag == child: return e[0].itersiblings() return e.iterchildren() def parse_rows(e, has_header_row, has_header_col): def is_header(i, j): return i == 0 and has_header_row or j == 0 and has_header_col return [ mkel('tr', {}, [ mkel( 'th' if is_header(i, j) else 'td', cell_bg(tc), self.parse_body(skip_past(tc, TABLE_COLUMN_PROPERTIES_TAG), current_part=current_part)) for (j, tc) in enumerate(tr.iterfind(TABLE_COLUMN_TAG)) ]) for (i, tr) in enumerate(e.iterfind(TABLE_ROW_TAG)) ] tblPr = first_of_tag(e, ns.w('tblPr')) tbl_stuff = tblPr.itersiblings() tblGrid = next(tbl_stuff) # according to the schema this is always true assert tblGrid.tag == ns.w('tblGrid'), tblGrid.tag look = tblPr.find(ns.w('tblLook')) if look is None: has_header_row = has_header_col = False else: # this is actually the canonical check; # the identical per cell/row props are just for caching has_header_row, has_header_col = (look.attrib.get(k) == "1" for k in (ns.w('firstRow'), ns.w('firstColumn'))) grid_cols = tblGrid.iterchildren(ns.w('gridCol')) col_widths = [int(gc.attrib[ns.w('w')]) for gc in grid_cols] col_total = sum(col_widths) col_pcts = [100. * w / col_total for w in col_widths] cols = [ mkel('col', add_style({}, 'width', '%s%%' % w), []) for w in col_pcts ] rows = parse_rows(e, has_header_row, has_header_col) table = odt_parser.parse_table_body(cols + rows) return mkel('table', {}, table)
def parse_table(self, e, current_part): # XXX(ash): simplify # pylint: disable=R0914 def cell_bg(tc): if tc[0].tag == TABLE_COLUMN_PROPERTIES_TAG: bg = val(tc[0], ns.w('shd'), ns.w('fill')) if bg: return add_bg({}, '#' + bg) return {} def skip_past(e, child): if e[0].tag == child: return e[0].itersiblings() return e.iterchildren() def parse_rows(e, has_header_row, has_header_col): def is_header(i, j): return i == 0 and has_header_row or j == 0 and has_header_col return [ mkel('tr', {}, [mkel('th' if is_header(i, j) else 'td', cell_bg(tc), self.parse_body( skip_past(tc, TABLE_COLUMN_PROPERTIES_TAG), current_part=current_part)) for (j, tc) in enumerate(tr.iterfind(TABLE_COLUMN_TAG))]) for (i, tr) in enumerate(e.iterfind(TABLE_ROW_TAG))] tblPr = first_of_tag(e, ns.w('tblPr')) tbl_stuff = tblPr.itersiblings() tblGrid = next(tbl_stuff) # according to the schema this is always true assert tblGrid.tag == ns.w('tblGrid'), tblGrid.tag look = tblPr.find(ns.w('tblLook')) if look is None: has_header_row = has_header_col = False else: # this is actually the canonical check; # the identical per cell/row props are just for caching has_header_row, has_header_col = ( look.attrib.get(k) == "1" for k in (ns.w('firstRow'), ns.w('firstColumn'))) grid_cols = tblGrid.iterchildren(ns.w('gridCol')) col_widths = [int(gc.attrib[ns.w('w')]) for gc in grid_cols] col_total = sum(col_widths) col_pcts = [100. * w / col_total for w in col_widths] cols = [mkel('col', add_style({}, 'width', '%s%%' % w), []) for w in col_pcts] rows = parse_rows(e, has_header_row, has_header_col) table = odt_parser.parse_table_body(cols + rows) return mkel('table', {}, table)
def __init__(self, infilename, make_transclusions): self.doc = doc = docxlite.Document(infilename) # pylint: disable=W0212 self.numbering = doc.numbering self.body = doc.document.e.find(ns.w('body')) self.rels = doc.document.rels sprops = docxlite.parse_sectPr(self.body[-1]) self.textwidth_emu = (sprops.page_width.emu.real - sprops.right_margin.emu.real - sprops.left_margin.emu.real) if make_transclusions: self.transclusions = make_transclusions(self.doc.get_images()) else: self.transclusions = None self.default_indent_twips = 720
def apply_rpr(self, rPr, ans): stys = {x.tag for x in rPr.iterchildren(*self.STYLE_TO_HTML)} if stys: for (t, html) in self.STYLE_TO_HTML.iteritems(): if t in stys: ans = [mkel(html, {}, ans)] color = val(rPr, ns.w('color')) if color: a = add_style({}, 'color', '#' + color) ans = [mkel('span', a, ans)] # FIXME word colors # `None` here == turn highlighting off; it's different from no value highlight = self.HIGHLIGHT_TO_RGB.get(val(rPr, ns.w('highlight')), False) if highlight is False: # higher precedence than shade highlight = val(rPr, ns.w('shd'), ns.w('fill')) if highlight: ans = [mkel('span', add_bg({}, '#' + highlight), ans)] vertalign = val(rPr, ns.w('vertAlign')) if vertalign and vertalign != 'baseline': ans = [mkel(vertalign[:3], {}, ans)] if is_code_font(val(rPr, ns.w('rFonts'), ns.w('ascii'))): ans = [mkel('code', {}, ans)] return ans
class Document(object): LVL_XPATH_TEMPL = ('./w:abstractNum[@w:abstractNumId="%s"]/' 'w:lvl[@w:ilvl="%s"]'.replace('w:', ns.w(''))) A_NUMID_XPATH_TEMPL = ('./w:num[@w:numId="%s"]/w:abstractNumId' .replace('w:', ns.w(''))) def __init__(self, path_or_file): self.z = read_zip(path_or_file) self.document = get_part(self.z, MAGIC_WORD + '/document.xml') self.numbering = get_part(self.z, MAGIC_WORD + '/numbering.xml') self.footnotes = get_part(self.z, MAGIC_WORD + '/footnotes.xml') self.endnotes = get_part(self.z, MAGIC_WORD + '/endnotes.xml') @staticmethod def _get_by_id(id, xs): # pylint: disable=W0622 return next(x for x in xs if id == x.attrib[ns.w('id')]) def get_footnote(self, id): # pylint: disable=W0622 return self._get_by_id(id, self.footnotes.e) def get_endnote(self, id): # pylint: disable=W0622 return self._get_by_id(id, self.endnotes.e) def get_num_style(self, numid, level): numid_xpath = self.A_NUMID_XPATH_TEMPL % numid abstract_num_id = self.numbering.e.find(numid_xpath).attrib[ns.w('val')] lvl_xpath = self.LVL_XPATH_TEMPL % (abstract_num_id, level) lvl, = self.numbering.e.iterfind(lvl_xpath) numFmt = val(lvl, ns.w('numFmt')) lvlText = val(lvl, ns.w('lvlText')) return NumStyle(numFmt=numFmt, lvlText=lvlText) def get_or_add_extn(self, mime_type): path = '[Content_Types].xml' e = to_etree(self.z.get(path)) ctns = 'http://schemas.openxmlformats.org/package/2006/content-types' default = e.xpath('./ct:Default[@ContentType="%s"]/@Extension' % mime_type, # pylint: disable=C0301 namespaces={'ct': ctns}) if len(default) == 1: return default[0] extn = guess_extension(mime_type) default = e.xpath('./ct:Default[@Extension="%s"]' % extn, namespaces={'ct': ctns}) assert len(default) == 0 # XXX(ash): handle this case more gracefully tup = ('Default', {'ContentType': mime_type, 'Extension': extn}, []) e[:0] = [tup2etree(tup, {None: ctns})] self.z[path] = etree2s(e) return extn def add_image(self, f, mime_type): extn = self.get_or_add_extn(mime_type) img = f.read() p = fresh_name(set(self.z.keys()), MAGIC_WORD + '/media/image%d.' + extn) self.z[p] = img # XXX(ash): why do we have to relativize the targets rel_p = p.split('/', 1)[1] rid = fresh_name(set(self.document.rels), 'rId%d') self.document.rels[rid] = tup2etree( ('Relationship', {'Target': rel_p, 'Type': IMAGE_REL_URI, 'Id': rid}, [])) return rid def get_images(self): # XXX(ash): what about the rels of the other parts... images = dict((id, r.attrib['Target']) for (id, r) in self.document.rels.iteritems() if r.attrib['Type'] == IMAGE_REL_URI) includes = dict((id, StringIO(self.z[MAGIC_WORD + '/' + fn])) for id, fn in images.items()) return includes def get_rels_for(self, part): return getattr(self, part).rels def save(self, f): replacements = {} for name in ['document', 'numbering', 'footnotes', 'endnotes']: part = getattr(self, name) if part.e is not None: replacements[part.path] = etree2s(part.e, decl=True) if part.rels: replacements[part.rels_path] = rels2s(part.rels) with ZipFile(f, 'w') as outz: for filename, contents in self.z.items(): replacement = replacements.get(filename, None) outz.writestr( filename, replacement if replacement else contents)
def _get_by_id(id, xs): # pylint: disable=W0622 return next(x for x in xs if id == x.attrib[ns.w('id')])
def cell_bg(tc): if tc[0].tag == TABLE_COLUMN_PROPERTIES_TAG: bg = val(tc[0], ns.w('shd'), ns.w('fill')) if bg: return add_bg({}, '#' + bg) return {}
class Docx(object): # get "normal" inline images (i.e. ignnores VML and similar crap) IMAGE_XPATH = ( './*[self::wp:inline|self::wp:anchor]' '[.//a:graphicData' '[@uri="http://schemas.openxmlformats.org/drawingml/2006/picture"]]') JC_TO_CLASS = { 'left': 'left', 'right': 'right', 'center': 'center', 'both': 'justify', } STYLE_TO_HTML = OrderedDict( (ns.w(x), x[0]) for x in ['u', 'b', 'bCs', 'i', 'iCs', 'strike']) HIGHLIGHT_TO_RGB = { 'black': '000000', 'blue': '0000ff', 'cyan': '00ffff', 'darkBlue': '000080', 'darkCyan': '008080', 'darkGray': '808080', 'darkGreen': '008000', 'darkMagenta': '800080', 'darkRed': '800000', 'darkYellow': '808000', 'green': '00ff00', 'lightGray': 'c0c0c0', 'magenta': 'ff00ff', 'red': 'ff0000', 'white': 'ffffff', 'yellow': 'ffff00', 'none': None, } def __init__(self, infilename, make_transclusions): self.doc = doc = docxlite.Document(infilename) # pylint: disable=W0212 self.numbering = doc.numbering self.body = doc.document.e.find(ns.w('body')) self.rels = doc.document.rels sprops = docxlite.parse_sectPr(self.body[-1]) self.textwidth_emu = (sprops.page_width.emu.real - sprops.right_margin.emu.real - sprops.left_margin.emu.real) if make_transclusions: self.transclusions = make_transclusions(self.doc.get_images()) else: self.transclusions = None self.default_indent_twips = 720 def parse(self): return (self.parse_body(self.body, current_part='document'), self.transclusions) def handle_omath(self, e): # pylint: disable=W0613 return [] def handle_p_content(self, e, current_part): if e.tag == RUN_TAG: return self.handle_run(e) elif e.tag == HYPERLINK_TAG: internalId = e.attrib.get(ns.r('id')) if internalId is None: ref = '#' + e.attrib[ns.w('anchor')] else: rels = self.doc.get_rels_for(current_part) ref = rels[internalId].attrib['Target'] # 'u', 'span' = nuke bogus color and underline # styling that google docs likes to add to links; # XXX(alexander): rewrite colour less bluntly; # this also nukes background color handle_p = partial(self.handle_p_content, current_part=current_part) body = whack(('u', 'span').__contains__, flatmap(handle_p, e)) if not body: log.warn('hyperlink with no body to: %r', ref) return [mkel('a', {'href': ref}, body)] elif e.tag == BOOKMARK_END_TAG: return [] elif e.tag == BOOKMARK_START_TAG: return [mkel('a', {'name': e.attrib[ns.w('name')]}, [])] elif e.tag == ns.m('oMath'): return self.handle_omath(e) else: log.warn('Ignoring unknown tag %s', e.tag) return [] def transclude(self, pic): # for id: # pylint: disable=W0622 if self.transclusions is None: return [] width_emu = float(val(pic, ns.wp('extent'), 'cx')) embeds = pic.xpath('.//a:blip/@r:embed', namespaces=ns.dict) try: id, = embeds except ValueError: log.warn('Expected exactly one r:embed with an image id, got %r', embeds) return [] href = self.transclusions.normalize_known_transclusion(id) return [ make_figure(relwidth=width_emu / self.textwidth_emu, inline={ 'anchor': False, 'inline': True }[pic.tag.split('}')[1]], body=[mkel('img', {'src': href}, [])], src=href, original_href=id) ] def make_footnote(self, e): # pylint: disable=W0622 id = e.attrib[ns.w('id')] ps = (self.doc.get_footnote if e.tag == FOOTNOTE_REFERENCE_TAG else self.doc.get_endnote)(id).iterfind(P_TAG) footnote_part = 'footnotes' # XXX what about endnotes return mkel('.footnote', {}, [self.handle_p(p, current_part=footnote_part) for p in ps]) def handle_run(self, r): # XXX(ash): pylint is right about this being too complex # pylint: disable=R0912 _ = Var('_') ans = [] rPr = first_of_tag(r, RUN_PROPS_TAG) content = rPr.itersiblings() if rPr is not None else iter(r) for e in content: # pylint: disable=W0622 type = e.attrib.get(ns.w('type')) if e.tag == TEXT_TAG: ans.append(e.text) elif e.tag == TAB_TAG: # XXX(alexander): this can also work like a '_' or '…' \dotfill ans.append('\t') elif e.tag in (FOOTNOTE_REF_TAG, ENDNOTE_REF_TAG): # XXX(ash): what is going on here pass elif e.tag == BREAK_TAG and type in ('page', 'column'): ans.append(mkel('.pagebreak', {}, [])) elif e.tag == BREAK_TAG or e.tag == CR_TAG: assert (type is None) or (type == 'textWrapping') ans.append(mkel('br', {}, [])) # FIXME, tags below untested elif e.tag == SOFT_HYPHEN_TAG: ans.append(SOFT_HYPHEN) elif e.tag == NON_BREAKING_HYPHEN_TAG: ans.append(NON_BREAKING_HYPHEN) elif e.tag == ns.w('drawing'): ans.extend( flatmap(self.transclude, e.xpath(self.IMAGE_XPATH, namespaces=ns.dict))) elif e.tag in (FOOTNOTE_REFERENCE_TAG, ENDNOTE_REFERENCE_TAG): ans.append(self.make_footnote(e)) else: # movie, # rt, ruby, rubyAlign etc. for ruby stuff # sym, with special handling for wingdings I guess... log.warn('Unknown tag %r', e.tag) if rPr is not None and ans != Seq[Seq['.footnote', _:], _:]: ans = self.apply_rpr(rPr, ans) return ans def apply_rpr(self, rPr, ans): stys = {x.tag for x in rPr.iterchildren(*self.STYLE_TO_HTML)} if stys: for (t, html) in self.STYLE_TO_HTML.iteritems(): if t in stys: ans = [mkel(html, {}, ans)] color = val(rPr, ns.w('color')) if color: a = add_style({}, 'color', '#' + color) ans = [mkel('span', a, ans)] # FIXME word colors # `None` here == turn highlighting off; it's different from no value highlight = self.HIGHLIGHT_TO_RGB.get(val(rPr, ns.w('highlight')), False) if highlight is False: # higher precedence than shade highlight = val(rPr, ns.w('shd'), ns.w('fill')) if highlight: ans = [mkel('span', add_bg({}, '#' + highlight), ans)] vertalign = val(rPr, ns.w('vertAlign')) if vertalign and vertalign != 'baseline': ans = [mkel(vertalign[:3], {}, ans)] if is_code_font(val(rPr, ns.w('rFonts'), ns.w('ascii'))): ans = [mkel('code', {}, ans)] return ans def handle_p(self, e, current_part, in_list=False): attrs = {} pPr = first_of_tag(e, P_PROPS_TAG) jc_class = self.JC_TO_CLASS.get(val(pPr, ns.w('jc'))) if jc_class: attrs = add_class(attrs, jc_class) tag = style_to_tag(val(pPr, ns.w('pStyle')) or '') content = iter(e) if pPr is None else pPr.itersiblings() handle_p = partial(self.handle_p_content, current_part=current_part) ans = mkel(tag, attrs, flatmap(handle_p, content)) left_indent = val(pPr, ns.w('ind'), ns.w('left')) or 0.0 indent = int(round(float(left_indent) / self.default_indent_twips)) if (not in_list) and indent: ans = lift_code(ans) ans = mkel('.block', {'indent': indent}, [ans]) ans = hacky_flatten_block(ans) return ans def parse_body(self, xml, current_part): builder = ListBuilder(self.doc) body = [] for e in xml: if e.tag == P_TAG: handle_p = partial(self.handle_p, current_part=current_part) body.extend(builder.process(e, handle_p)) else: body.extend(builder.flush()) if e.tag == TABLE_TAG: body.append(self.parse_table(e, current_part)) elif e.tag == SECTION_PROPERTIES_TAG: pass else: log.warn('Unrecognized element: %s', e.tag) body.extend(builder.flush()) return body def parse_table(self, e, current_part): # XXX(ash): simplify # pylint: disable=R0914 def cell_bg(tc): if tc[0].tag == TABLE_COLUMN_PROPERTIES_TAG: bg = val(tc[0], ns.w('shd'), ns.w('fill')) if bg: return add_bg({}, '#' + bg) return {} def skip_past(e, child): if e[0].tag == child: return e[0].itersiblings() return e.iterchildren() def parse_rows(e, has_header_row, has_header_col): def is_header(i, j): return i == 0 and has_header_row or j == 0 and has_header_col return [ mkel('tr', {}, [ mkel( 'th' if is_header(i, j) else 'td', cell_bg(tc), self.parse_body(skip_past(tc, TABLE_COLUMN_PROPERTIES_TAG), current_part=current_part)) for (j, tc) in enumerate(tr.iterfind(TABLE_COLUMN_TAG)) ]) for (i, tr) in enumerate(e.iterfind(TABLE_ROW_TAG)) ] tblPr = first_of_tag(e, ns.w('tblPr')) tbl_stuff = tblPr.itersiblings() tblGrid = next(tbl_stuff) # according to the schema this is always true assert tblGrid.tag == ns.w('tblGrid'), tblGrid.tag look = tblPr.find(ns.w('tblLook')) if look is None: has_header_row = has_header_col = False else: # this is actually the canonical check; # the identical per cell/row props are just for caching has_header_row, has_header_col = (look.attrib.get(k) == "1" for k in (ns.w('firstRow'), ns.w('firstColumn'))) grid_cols = tblGrid.iterchildren(ns.w('gridCol')) col_widths = [int(gc.attrib[ns.w('w')]) for gc in grid_cols] col_total = sum(col_widths) col_pcts = [100. * w / col_total for w in col_widths] cols = [ mkel('col', add_style({}, 'width', '%s%%' % w), []) for w in col_pcts ] rows = parse_rows(e, has_header_row, has_header_col) table = odt_parser.parse_table_body(cols + rows) return mkel('table', {}, table) def strip_meta(self, unaugmented_meta, transclusions, asides): # XXX(ash): :( from converter.postprocess import postprocess for i in range(len(self.body)): raw_body_i = self.parse_body(self.body[:i], current_part='document') unaugmented_meta_i = postprocess(raw_body_i, transclusions, asides=asides)[0] if unaugmented_meta_i == unaugmented_meta: self.body[:i] = [] return raise Exception('failed to find the end of the metadata') @staticmethod def meta_to_docx(meta, intern_image, total_w): tups = [] meta_copy = meta.raw_items().copy() to_runs = partial(meta_to_runs, intern_image=intern_image, total_w=total_w) for name in ['Title', 'Subtitle']: bit = meta_copy.pop(name.lower(), None) if bit: pr = mkel( 'w:pPr', {}, [ # FIXME(ash): currently we don't ensure the styles exist mkel('w:pStyle', {'w:val': name}, []) ]) tups.append(make_p(pr, *to_runs(bit))) for k, v in meta_copy.iteritems(): body = (to_runs([mkel('u', {}, [str(k) + ':']), ' ']) + to_runs(v)) tups.append(make_p(*body)) return [tup2etree(tup, nsmap=ns.dict) for tup in tups] def intern_image(self, image): assert isinstance(image, literal.Image) f = StringIO(image.data) rid = self.doc.add_image(f, image.mimetype) return rid def insert_meta(self, meta): self.body[:0] = self.meta_to_docx(meta, self.intern_image, self.textwidth_emu) def save_to(self, f): self.doc.save(f)