def handle_p_content(self, e, current_part): if e.tag == RUN_TAG: return self.handle_run(e) elif e.tag == HYPERLINK_TAG: internalId = e.attrib.get(ns.r('id')) if internalId is None: ref = '#' + e.attrib[ns.w('anchor')] else: rels = self.doc.get_rels_for(current_part) ref = rels[internalId].attrib['Target'] # 'u', 'span' = nuke bogus color and underline # styling that google docs likes to add to links; # XXX(alexander): rewrite colour less bluntly; # this also nukes background color handle_p = partial(self.handle_p_content, current_part=current_part) body = whack(('u', 'span').__contains__, flatmap(handle_p, e)) if not body: log.warn('hyperlink with no body to: %r', ref) return [mkel('a', {'href': ref}, body)] elif e.tag == BOOKMARK_END_TAG: return [] elif e.tag == BOOKMARK_START_TAG: return [mkel('a', {'name': e.attrib[ns.w('name')]}, [])] elif e.tag == ns.m('oMath'): return self.handle_omath(e) else: log.warn('Ignoring unknown tag %s', e.tag) return []
def handle_p_content(self, e, current_part): if e.tag == RUN_TAG: return self.handle_run(e) elif e.tag == HYPERLINK_TAG: internalId = e.attrib.get(ns.r('id')) if internalId is None: ref = '#' + e.attrib[ns.w('anchor')] else: rels = self.doc.get_rels_for(current_part) ref = rels[internalId].attrib['Target'] # 'u', 'span' = nuke bogus color and underline # styling that google docs likes to add to links; # XXX(alexander): rewrite colour less bluntly; # this also nukes background color handle_p = partial(self.handle_p_content, current_part=current_part) body = whack(('u', 'span').__contains__, flatmap(handle_p, e)) if not body: log.warn('hyperlink with no body to: %r', ref) return [mkel('a', {'href': ref}, body)] elif e.tag == BOOKMARK_END_TAG: return [] elif e.tag == BOOKMARK_START_TAG: return [mkel('a', {'name': e.attrib[ns.w('name')]}, [])] elif e.tag == ns.m('oMath'): return self.handle_omath(e) else: log.warn('Ignoring unknown tag %s', e.tag) return []
def gen_journal(entry): journal = [] try: journal.append(mkel('.journal-title', item_attributes('isPartOf', 'Periodical', 'itemscope'), [entry.fields['journal']])) except KeyError: pass try: journal.append(mkel('.volume', item_attributes('volumeNumber'), [entry.fields['volume']])) journal.append( mkel('.number', item_attributes('issueNumber'), ['(', entry.fields['number'], ')'])) except KeyError: pass if journal: yield mkel('.journal', item_attributes('isPartOf', 'PublicationVolume', 'itemscope'), list(intersperse(' ', journal)))
def gen_journal(entry): journal = [] try: journal.append( mkel('.journal-title', item_attributes('isPartOf', 'Periodical', 'itemscope'), [entry.fields['journal']])) except KeyError: pass try: journal.append( mkel('.volume', item_attributes('volumeNumber'), [entry.fields['volume']])) journal.append( mkel('.number', item_attributes('issueNumber'), ['(', entry.fields['number'], ')'])) except KeyError: pass if journal: yield mkel( '.journal', item_attributes('isPartOf', 'PublicationVolume', 'itemscope'), list(intersperse(' ', journal)))
def meta_to_runs(what, intern_image, total_w): # pylint: disable=R0911 recurse = partial(meta_to_runs, intern_image=intern_image, total_w=total_w) if isinstance(what, basestring): return [mkel('w:r', {}, [mk_t(what)])] elif isinstance(what, list): return flatmap(recurse, what) elif isinstance(what, tuple): t, _, b = what runs = recurse(b) if t in ('b', 'i', 's', 'u'): return [apply_html_style(t, run) for run in runs] else: log.warn("Didn't understand html tag %r", what) return runs elif isinstance(what, literal.Image): rid = intern_image(what) target_w = parse_percentage(what.style['width']) * total_w w, h = what.get_size() w, h = [docxlite.Emu(x * target_w / h) for x in (w, h)] inline = (what.style['display'] == 'inline') return [mkel('w:r', {}, [make_pic(rid, w, h, inline)])] elif isinstance(what, literal.Bibliography): return recurse(what.data) else: log.warn('Fallthrough: %r', what) return recurse(unparse_literal(what))
def meta_to_runs(what, intern_image, total_w): # pylint: disable=R0911 recurse = partial(meta_to_runs, intern_image=intern_image, total_w=total_w) if isinstance(what, basestring): return [mkel('w:r', {}, [mk_t(what)])] elif isinstance(what, list): return flatmap(recurse, what) elif isinstance(what, tuple): t, _, b = what runs = recurse(b) if t in ('b', 'i', 's', 'u'): return [apply_html_style(t, run) for run in runs] else: log.warn("Didn't understand html tag %r", what) return runs elif isinstance(what, literal.Image): rid = intern_image(what) target_w = parse_percentage(what.style['width']) * total_w w, h = what.get_size() w, h = [docxlite.Emu(x * target_w / h) for x in (w, h)] inline = (what.style['display'] == 'inline') return [mkel('w:r', {}, [make_pic(rid, w, h, inline)])] elif isinstance(what, literal.Bibliography): return recurse(what.data) else: log.warn('Fallthrough: %r', what) return recurse(unparse_literal(what))
def _coalesce_blocks(attrs, blocks): B = Var('B') _ = Var('_') blocks = list(blocks) _debug = blocks[:] def next_body(): return blocks.pop(0)[2] if blocks else [] while True: body = next_body() if not body: break pre_block = [] while body and body == [('code', {}, B)]: pre_block.append(plaintextify(B.val) + '\n') body = next_body() if pre_block: pre_block = mkel('pre', {}, pre_block) yield pre_block non_pre_block = [] while body and body != [('code', {}, B)]: is_citation = 'right' in attrs.get('class', []) if is_citation: non_pre_block.append( mkel('footer', {}, [mkel('cite', {}, body)])) else: if needs_wrapping_in_p(body): body = [mkel('p', {}, body)] non_pre_block.extend(body) body = next_body() if non_pre_block: yield mkel('blockquote', {}, tidy(non_pre_block))
def meta_to_docx(meta, intern_image, total_w): tups = [] meta_copy = meta.raw_items().copy() to_runs = partial(meta_to_runs, intern_image=intern_image, total_w=total_w) for name in ['Title', 'Subtitle']: bit = meta_copy.pop(name.lower(), None) if bit: pr = mkel( 'w:pPr', {}, [ # FIXME(ash): currently we don't ensure the styles exist mkel('w:pStyle', {'w:val': name}, []) ]) tups.append(make_p(pr, *to_runs(bit))) for k, v in meta_copy.iteritems(): body = (to_runs([mkel('u', {}, [str(k) + ':']), ' ']) + to_runs(v)) tups.append(make_p(*body)) return [tup2etree(tup, nsmap=ns.dict) for tup in tups]
def _coalesce_blocks(attrs, blocks): B = Var('B') _ = Var('_') blocks = list(blocks) _debug = blocks[:] def next_body(): return blocks.pop(0)[2] if blocks else [] while True: body = next_body() if not body: break pre_block = [] while body and body == [('code', {}, B)]: pre_block.append(plaintextify(B.val) + '\n') body = next_body() if pre_block: pre_block = mkel('pre', {}, pre_block) yield pre_block non_pre_block = [] while body and body != [('code', {}, B)]: is_citation = 'right' in attrs.get('class', []) if is_citation: non_pre_block.append(mkel('footer', {}, [mkel('cite', {}, body)])) else: if needs_wrapping_in_p(body): body = [mkel('p', {}, body)] non_pre_block.extend(body) body = next_body() if non_pre_block: yield mkel('blockquote', {}, tidy(non_pre_block))
def handle_emphasis(self, emph, body): r"""Boldens italicizes or strikes-through latex text. Harder than it sounds: The problem being that \textbf and \textit don't work across paragraphs and \bfseries and \itshape don't do italic correction (i.e. the end of the emphasized text juts into what follows it, because the space is not widened as necessary). >>> writer = LatexWriter() >>> print writer.handle_emphasis('b', ['some bold text']) \textbf{some bold text} >>> print writer.handle_emphasis( ... 'b', [('p', {}, [('i', {}, ['some bold italic'])]), 'text']) {\bfseries{}\textit{some bold italic} <BLANKLINE> text\/} >>> With strikethrough and underline the problem is even worse. TeX itself has no underline/strikethrough at all and the default LaTeX \underline command is broken (e.g. makes the text un(line)breakable). All replacements like soul's \ul and ulem's \uline have weird limitations that cause random breakage, so we push these styles down into the body recursively. >>> print writer.handle_emphasis( ... 'u', [('p', {}, [('i', {}, ... [('b', {}, ['ul bold italic'])])]), 'text']) {\itshape{}{\bfseries{}\uline{ul bold italic}\/}\/} <BLANKLINE> \uline{text} """ # can safely use \textit/\textbf etc. INLINE_TEXT = Var( 'INLINE_TEXT', # pylint: disable=C0103 lambda x: isinstance(x, basestring) and '\n' not in x) if body == [INLINE_TEXT]: return cmd(self.INLINE_EMPH_TO_LATEX[emph], [], [self.latexify(body)]) else: if emph in ('b', 'i'): # need to use itshape/bfseries and do italic correction (r'\/') return texcmd( dict(b='bfseries', i='itshape')[emph], join(self.latexify(body), r'\/')) else: assert emph in ('u', 's') # XXX: it might be better to have latexify as the outmost call # here rather than join indivudally converted parts. That would # allow for further rewrite logic in other parts of the latex # converter. return join(*(self.handle_emphasis(emph, [e]) if isinstance( e, basestring) else self.latexify( mkel(*e[:2], body=[ mkel(emph, {}, [subbody_part]) for subbody_part in e[2] ])) for e in body))
def ensec(heading, section, kill_anchor, gensym): h, attr, body = heading assert h in H_TAGS # reasons for lifting the anchor id to the section include: # - epub seems to require sections to have id's # - endnotify expects sections to have id's attr, body = lift_anchor_id(attr, body, gensym, kill_anchor) return mkel('section', attr, [mkel(h, {}, body)] + section)
def ensec(heading, section, kill_anchor, gensym): h, attr, body = heading assert h in H_TAGS # reasons for lifting the anchor id to the section include: # - epub seems to require sections to have id's # - endnotify expects sections to have id's attr, body = lift_anchor_id(attr, body, gensym, kill_anchor) return mkel('section', attr, [mkel(h, {}, body)] + section)
def handle_emphasis(self, emph, body): r"""Boldens italicizes or strikes-through latex text. Harder than it sounds: The problem being that \textbf and \textit don't work across paragraphs and \bfseries and \itshape don't do italic correction (i.e. the end of the emphasized text juts into what follows it, because the space is not widened as necessary). >>> writer = LatexWriter() >>> print writer.handle_emphasis('b', ['some bold text']) \textbf{some bold text} >>> print writer.handle_emphasis( ... 'b', [('p', {}, [('i', {}, ['some bold italic'])]), 'text']) {\bfseries{}\textit{some bold italic} <BLANKLINE> text\/} >>> With strikethrough and underline the problem is even worse. TeX itself has no underline/strikethrough at all and the default LaTeX \underline command is broken (e.g. makes the text un(line)breakable). All replacements like soul's \ul and ulem's \uline have weird limitations that cause random breakage, so we push these styles down into the body recursively. >>> print writer.handle_emphasis( ... 'u', [('p', {}, [('i', {}, ... [('b', {}, ['ul bold italic'])])]), 'text']) {\itshape{}{\bfseries{}\uline{ul bold italic}\/}\/} <BLANKLINE> \uline{text} """ # can safely use \textit/\textbf etc. INLINE_TEXT = Var('INLINE_TEXT', # pylint: disable=C0103 lambda x: isinstance(x, basestring) and '\n' not in x) if body == [INLINE_TEXT]: return cmd(self.INLINE_EMPH_TO_LATEX[emph], [], [self.latexify(body)]) else: if emph in ('b', 'i'): # need to use itshape/bfseries and do italic correction (r'\/') return texcmd(dict(b='bfseries', i='itshape')[emph], join(self.latexify(body), r'\/')) else: assert emph in ('u', 's') # XXX: it might be better to have latexify as the outmost call # here rather than join indivudally converted parts. That would # allow for further rewrite logic in other parts of the latex # converter. return join(*( self.handle_emphasis(emph, [e]) if isinstance(e, basestring) else self.latexify( mkel(*e[:2], body=[ mkel(emph, {}, [subbody_part]) for subbody_part in e[2]])) for e in body))
def parse_table(self, e, current_part): # XXX(ash): simplify # pylint: disable=R0914 def cell_bg(tc): if tc[0].tag == TABLE_COLUMN_PROPERTIES_TAG: bg = val(tc[0], ns.w('shd'), ns.w('fill')) if bg: return add_bg({}, '#' + bg) return {} def skip_past(e, child): if e[0].tag == child: return e[0].itersiblings() return e.iterchildren() def parse_rows(e, has_header_row, has_header_col): def is_header(i, j): return i == 0 and has_header_row or j == 0 and has_header_col return [ mkel('tr', {}, [ mkel( 'th' if is_header(i, j) else 'td', cell_bg(tc), self.parse_body(skip_past(tc, TABLE_COLUMN_PROPERTIES_TAG), current_part=current_part)) for (j, tc) in enumerate(tr.iterfind(TABLE_COLUMN_TAG)) ]) for (i, tr) in enumerate(e.iterfind(TABLE_ROW_TAG)) ] tblPr = first_of_tag(e, ns.w('tblPr')) tbl_stuff = tblPr.itersiblings() tblGrid = next(tbl_stuff) # according to the schema this is always true assert tblGrid.tag == ns.w('tblGrid'), tblGrid.tag look = tblPr.find(ns.w('tblLook')) if look is None: has_header_row = has_header_col = False else: # this is actually the canonical check; # the identical per cell/row props are just for caching has_header_row, has_header_col = (look.attrib.get(k) == "1" for k in (ns.w('firstRow'), ns.w('firstColumn'))) grid_cols = tblGrid.iterchildren(ns.w('gridCol')) col_widths = [int(gc.attrib[ns.w('w')]) for gc in grid_cols] col_total = sum(col_widths) col_pcts = [100. * w / col_total for w in col_widths] cols = [ mkel('col', add_style({}, 'width', '%s%%' % w), []) for w in col_pcts ] rows = parse_rows(e, has_header_row, has_header_col) table = odt_parser.parse_table_body(cols + rows) return mkel('table', {}, table)
def gen_authors(entry): try: authors = [mkel('.author', item_attributes('author', 'Person', 'itemscope'), gen_name(a)) for a in entry.persons['author']] except KeyError: authors = [mkel('.author', {}, ['Anon'])] separator = mkel('.author-separator', {}, ['; ']) yield mkel('span.authors', {}, list(intersperse(separator, authors)))
def _parse_body(xml, handle_data_url, parent_tag, footnote_state): # pylint: disable=R0912,R0914 if parent_tag == 'pre': return [etree.tostring(xml, method="text")] ans = [] xml = list(xml) for e in xml: tag = e.tag text = e.text or '' tail = e.tail or '' body = _parse_body(xml=e, handle_data_url=handle_data_url, parent_tag=tag, footnote_state=footnote_state) attrs = dict(e.attrib) _cleanup_attrs(tag, attrs) if text: body = [text] + body if 'class' in attrs: tag = _cleanup_classes(tag, attrs) if 'style' in attrs: _cleanup_style(tag, attrs) if tag == 'figure': # put figcaption in canonical order _cleanup_fig(attrs, body) elif tag == 'img' and parent_tag != 'figure': img_attrs, img_body = attrs, body _de_data_url(handle_data_url, img_attrs) # XXX(alexander): pop the 'margin' class; # the display 'inline' covers that if 'margin' in img_attrs.get('class', []): img_attrs['class'].remove('margin') if not img_attrs['class']: del img_attrs['class'] tag, attrs = 'figure', { 'style': OrderedDict([('display', 'inline')]) } body = [mkel('img', img_attrs, img_body)] _cleanup_fig(attrs, body) _de_data_url(handle_data_url, attrs) maybe_anchorize_id(tag, attrs, body) footnote = _maybe_handle_footnote(tag, attrs, body, footnote_state) if footnote is not None: ans.extend(footnote) elif tag in ALLOWED_TAGS: ans.append(mkel(tag, attrs, body)) elif tag == '.tex2jax_process': ans.append(mkcmd('tex', body)) else: log.info('Stripping non-allowed tag %s', tag) ans.extend(body) if tail: ans.append(tail) return ans
def split_footnote(e): t, a, b = e if t != '.footnote': return e counter[0] += 1 ordinal = [str(counter[0])] fid = '%s-fn%d' % (secid, counter[0]) if 'id' not in a else a['id'] endnotes.append( mkel('aside', merge_attrs(a, aside_attrs, {'id': fid}), b)) return mkel('a', merge_attrs(a_attrs, {'href': '#' + fid}), ordinal)
def _parse_body(xml, handle_data_url, parent_tag, footnote_state): # pylint: disable=R0912,R0914 if parent_tag == 'pre': return [etree.tostring(xml, method="text")] ans = [] xml = list(xml) for e in xml: tag = e.tag text = e.text or '' tail = e.tail or '' body = _parse_body(xml=e, handle_data_url=handle_data_url, parent_tag=tag, footnote_state=footnote_state) attrs = dict(e.attrib) _cleanup_attrs(tag, attrs) if text: body = [text] + body if 'class' in attrs: tag = _cleanup_classes(tag, attrs) if 'style' in attrs: _cleanup_style(tag, attrs) if tag == 'figure': # put figcaption in canonical order _cleanup_fig(attrs, body) elif tag == 'img' and parent_tag != 'figure': img_attrs, img_body = attrs, body _de_data_url(handle_data_url, img_attrs) # XXX(alexander): pop the 'margin' class; # the display 'inline' covers that if 'margin' in img_attrs.get('class', []): img_attrs['class'].remove('margin') if not img_attrs['class']: del img_attrs['class'] tag, attrs = 'figure', {'style': OrderedDict([('display', 'inline')])} body = [mkel('img', img_attrs, img_body)] _cleanup_fig(attrs, body) _de_data_url(handle_data_url, attrs) maybe_anchorize_id(tag, attrs, body) footnote = _maybe_handle_footnote(tag, attrs, body, footnote_state) if footnote is not None: ans.extend(footnote) elif tag in ALLOWED_TAGS: ans.append(mkel(tag, attrs, body)) elif tag == '.tex2jax_process': ans.append(mkcmd('tex', body)) else: log.info('Stripping non-allowed tag %s', tag) ans.extend(body) if tail: ans.append(tail) return ans
def split_footnote(e): t, a, b = e if t != '.footnote': return e counter[0] += 1 ordinal = [str(counter[0])] fid = '%s-fn%d' % (secid, counter[0]) if 'id' not in a else a['id'] endnotes.append(mkel('aside', merge_attrs(a, aside_attrs, {'id': fid}), b)) return mkel('a', merge_attrs(a_attrs, {'href': '#' + fid}), ordinal)
def parse_table(self, e, current_part): # XXX(ash): simplify # pylint: disable=R0914 def cell_bg(tc): if tc[0].tag == TABLE_COLUMN_PROPERTIES_TAG: bg = val(tc[0], ns.w('shd'), ns.w('fill')) if bg: return add_bg({}, '#' + bg) return {} def skip_past(e, child): if e[0].tag == child: return e[0].itersiblings() return e.iterchildren() def parse_rows(e, has_header_row, has_header_col): def is_header(i, j): return i == 0 and has_header_row or j == 0 and has_header_col return [ mkel('tr', {}, [mkel('th' if is_header(i, j) else 'td', cell_bg(tc), self.parse_body( skip_past(tc, TABLE_COLUMN_PROPERTIES_TAG), current_part=current_part)) for (j, tc) in enumerate(tr.iterfind(TABLE_COLUMN_TAG))]) for (i, tr) in enumerate(e.iterfind(TABLE_ROW_TAG))] tblPr = first_of_tag(e, ns.w('tblPr')) tbl_stuff = tblPr.itersiblings() tblGrid = next(tbl_stuff) # according to the schema this is always true assert tblGrid.tag == ns.w('tblGrid'), tblGrid.tag look = tblPr.find(ns.w('tblLook')) if look is None: has_header_row = has_header_col = False else: # this is actually the canonical check; # the identical per cell/row props are just for caching has_header_row, has_header_col = ( look.attrib.get(k) == "1" for k in (ns.w('firstRow'), ns.w('firstColumn'))) grid_cols = tblGrid.iterchildren(ns.w('gridCol')) col_widths = [int(gc.attrib[ns.w('w')]) for gc in grid_cols] col_total = sum(col_widths) col_pcts = [100. * w / col_total for w in col_widths] cols = [mkel('col', add_style({}, 'width', '%s%%' % w), []) for w in col_pcts] rows = parse_rows(e, has_header_row, has_header_col) table = odt_parser.parse_table_body(cols + rows) return mkel('table', {}, table)
def parse_rows(e, has_header_row, has_header_col): def is_header(i, j): return i == 0 and has_header_row or j == 0 and has_header_col return [ mkel('tr', {}, [mkel('th' if is_header(i, j) else 'td', cell_bg(tc), self.parse_body( skip_past(tc, TABLE_COLUMN_PROPERTIES_TAG), current_part=current_part)) for (j, tc) in enumerate(tr.iterfind(TABLE_COLUMN_TAG))]) for (i, tr) in enumerate(e.iterfind(TABLE_ROW_TAG))]
def gen_authors(entry): try: authors = [ mkel('.author', item_attributes('author', 'Person', 'itemscope'), gen_name(a)) for a in entry.persons['author'] ] except KeyError: authors = [mkel('.author', {}, ['Anon'])] separator = mkel('.author-separator', {}, ['; ']) yield mkel('span.authors', {}, list(intersperse(separator, authors)))
def parse_rows(e, has_header_row, has_header_col): def is_header(i, j): return i == 0 and has_header_row or j == 0 and has_header_col return [ mkel('tr', {}, [ mkel( 'th' if is_header(i, j) else 'td', cell_bg(tc), self.parse_body(skip_past(tc, TABLE_COLUMN_PROPERTIES_TAG), current_part=current_part)) for (j, tc) in enumerate(tr.iterfind(TABLE_COLUMN_TAG)) ]) for (i, tr) in enumerate(e.iterfind(TABLE_ROW_TAG)) ]
def gen_url(entry): attr = item_attributes('url') try: attr['href'] = entry.fields['url'] yield mkel('a.url', attr, [entry.fields['url']]) except KeyError: pass
def _maybe_handle_footnote(tag, attrs, body, footnote_state): classes = attrs.get('class', []) if tag == 'a' and 'noteref' in classes: href = attrs['href'] if href.startswith('#'): # pylint: disable=W0622 id = href[1:] footnote_state[id] = body return [mkel('.footnote', {}, body)] else: # XXX(ash): make user-visible log.warn("Found a footnote reference but didn't understand its " "href (%s), so skipping it.", href) return [] if tag == 'aside' and 'endnote' in classes: print attrs, body id_ = attrs['id'] old_body = footnote_state.pop(id_, None) if old_body is None: # XXX(ash): make user-visible log.warn("Found a footnote body but its id (%s) doesn't match any " "footnote reference seen previously, so skipping it.", id_) else: # overwrite the body of the anchor we saved earlier old_body[:] = body return [] return None
def gen_url(entry): attr = item_attributes('url') try: attr['href'] = entry.fields['url'] yield mkel('a.url', attr, [entry.fields['url']]) except KeyError: pass
def _maybe_handle_footnote(tag, attrs, body, footnote_state): classes = attrs.get('class', []) if tag == 'a' and 'noteref' in classes: href = attrs['href'] if href.startswith('#'): # pylint: disable=W0622 id = href[1:] footnote_state[id] = body return [mkel('.footnote', {}, body)] else: # XXX(ash): make user-visible log.warn( "Found a footnote reference but didn't understand its " "href (%s), so skipping it.", href) return [] if tag == 'aside' and 'endnote' in classes: print attrs, body id_ = attrs['id'] old_body = footnote_state.pop(id_, None) if old_body is None: # XXX(ash): make user-visible log.warn( "Found a footnote body but its id (%s) doesn't match any " "footnote reference seen previously, so skipping it.", id_) else: # overwrite the body of the anchor we saved earlier old_body[:] = body return [] return None
def gen_entry(entry, key): generators = { 'article': [gen_authors, gen_year, gen_title, gen_journal, gen_page], 'book': [gen_authors, gen_year, gen_title, gen_publisher], 'proceedings': [gen_authors, gen_year, gen_title, gen_publisher], 'inbook': [gen_authors, gen_year, gen_title, gen_title, gen_publisher, gen_chapter, gen_page], 'phdthesis': [gen_authors, gen_year, gen_title, gen_text('PhD diss.'), gen_school], 'inproceedings': [gen_authors, gen_year, gen_title, gen_booktitle, gen_publisher, gen_page], 'mastersthesis': [gen_authors, gen_year, gen_title, gen_text('Master diss.'), gen_school], 'misc': [gen_authors, gen_year, gen_title, gen_url] }.get(entry.type, 'misc') li_fields = [field for gen in generators for field in gen(entry)] li_fields = list(intersperse('. ', li_fields)) li_fields.append('.') li_type = { 'article': 'ScholarlyArticle', 'book': 'Book', 'proceedings': 'ConferenceProceedings', 'inbook': 'BookChapter', 'phdthesis': 'PhdThesis', 'inproceedings': 'ConferenceProceedings', 'mastersthesis': 'MasterThesis', 'misc' : 'Misc' }.get(entry.type, 'misc') li_attributes = item_attributes('citation', li_type, 'itemscope') li_attributes['id'] = _bibliography_anchor(key) return mkel('li.ref', li_attributes, li_fields)
def make_toc(title, lang, toc, toc_depth, titlepage=False): ns = {None: 'http://www.w3.org/1999/xhtml', 'epub': 'http://www.idpf.org/2007/ops'} toc_ol_body = [] if titlepage: toc_ol_body.append(mkel('li', {'id': 'toc-titlepage'}, [('a', {'href': 'titlepage.xhtml'}, [title])])) # FIXME(alexander): make this work for arbitrary toc-depth; # also don't tie to single-html file layout/name. assert toc_depth == 1 chapter_toc = [h for h in toc if isinstance(h, tuple)] toc_ol_body.extend( ('li', {'class': 'toc-chapter', 'id': 'toc-chapter-%d' % i}, [('a', {'href': 'main.xhtml#%s' % a['id']}, [h])]) for (i, (tag, a, (h,))) in zip(count(1), chapter_toc)) landmarks = make_landmarks(title, lang) return html_string_from_body( ('body', {}, [('section', {'class': 'frontmatter toc', 'epub:type': 'frontmatter toc'}, [('header', {}, [('h1', {}, [lang.localize('Contents')])]), ('nav', {'epub:type': 'toc', 'id': 'toc'}, [('ol', {}, toc_ol_body)]), landmarks ])]), title=title, nsmap=ns)
def _opf_item(href, id=None, mime=None, properties=None): #pylint: disable=W0622 id = id or href.split('.')[0].replace('/', '-') attrs = {'id': id, 'href': href, 'media-type': mime or mimetype_of_url(href)} if properties is not None: attrs['properties'] = properties return mkel('item', attrs, [])
def endnotify(body, aside_attrs, a_attrs, section_attrs): """Transform .footnotes to noterefs and chapter rearnotes. Assumes that `body` is a list of `<section>s`. """ ans = [] for section in body: t, a, b = section secid = a['id'] # pylint: disable=W0640 counter = [0] endnotes = [] def split_footnote(e): t, a, b = e if t != '.footnote': return e counter[0] += 1 ordinal = [str(counter[0])] fid = '%s-fn%d' % (secid, counter[0]) if 'id' not in a else a['id'] endnotes.append( mkel('aside', merge_attrs(a, aside_attrs, {'id': fid}), b)) return mkel('a', merge_attrs(a_attrs, {'href': '#' + fid}), ordinal) b = _transform(split_footnote, b) if len(endnotes) > 0: b.append(mkel('section', section_attrs, endnotes)) ans.append((t, a, b)) return ans
def endnotify(body, aside_attrs, a_attrs, section_attrs): """Transform .footnotes to noterefs and chapter rearnotes. Assumes that `body` is a list of `<section>s`. """ ans = [] for section in body: t, a, b = section secid = a['id'] # pylint: disable=W0640 counter = [0] endnotes = [] def split_footnote(e): t, a, b = e if t != '.footnote': return e counter[0] += 1 ordinal = [str(counter[0])] fid = '%s-fn%d' % (secid, counter[0]) if 'id' not in a else a['id'] endnotes.append(mkel('aside', merge_attrs(a, aside_attrs, {'id': fid}), b)) return mkel('a', merge_attrs(a_attrs, {'href': '#' + fid}), ordinal) b = _transform(split_footnote, b) if len(endnotes) > 0: b.append(mkel('section', section_attrs, endnotes)) ans.append((t, a, b)) return ans
def _coalesce_siblings(tag, attrs, sibling_group): compacted_content = tidy(_sib_bodies(sibling_group)) if (tag, attrs) == ('span', {}): for compacted_bit in compacted_content: yield compacted_bit else: yield mkel(tag, attrs, compacted_content)
def transclude(self, pic): # for id: # pylint: disable=W0622 if self.transclusions is None: return [] width_emu = float(val(pic, ns.wp('extent'), 'cx')) embeds = pic.xpath('.//a:blip/@r:embed', namespaces=ns.dict) try: id, = embeds except ValueError: log.warn('Expected exactly one r:embed with an image id, got %r', embeds) return [] href = self.transclusions.normalize_known_transclusion(id) return [ make_figure(relwidth=width_emu / self.textwidth_emu, inline={ 'anchor': False, 'inline': True }[pic.tag.split('}')[1]], body=[mkel('img', {'src': href}, [])], src=href, original_href=id) ]
def _coalesce_siblings(tag, attrs, sibling_group): compacted_content = tidy(_sib_bodies(sibling_group)) if (tag, attrs) == ('span', {}): for compacted_bit in compacted_content: yield compacted_bit else: yield mkel(tag, attrs, compacted_content)
def make_toc(title, lang, toc, toc_depth, titlepage=False): ns = { None: 'http://www.w3.org/1999/xhtml', 'epub': 'http://www.idpf.org/2007/ops' } toc_ol_body = [] if titlepage: toc_ol_body.append( mkel('li', {'id': 'toc-titlepage'}, [('a', { 'href': 'titlepage.xhtml' }, [title])])) # FIXME(alexander): make this work for arbitrary toc-depth; # also don't tie to single-html file layout/name. assert toc_depth == 1 chapter_toc = [h for h in toc if isinstance(h, tuple)] toc_ol_body.extend(('li', { 'class': 'toc-chapter', 'id': 'toc-chapter-%d' % i }, [('a', { 'href': 'main.xhtml#%s' % a['id'] }, [h])]) for (i, (tag, a, (h, ))) in zip(count(1), chapter_toc)) landmarks = make_landmarks(title, lang) return html_string_from_body(('body', {}, [('section', { 'class': 'frontmatter toc', 'epub:type': 'frontmatter toc' }, [('header', {}, [('h1', {}, [lang.localize('Contents')])]), ('nav', { 'epub:type': 'toc', 'id': 'toc' }, [('ol', {}, toc_ol_body)]), landmarks])]), title=title, nsmap=ns)
def make_footnote(self, e): # pylint: disable=W0622 id = e.attrib[ns.w('id')] ps = (self.doc.get_footnote if e.tag == FOOTNOTE_REFERENCE_TAG else self.doc.get_endnote)(id).iterfind(P_TAG) footnote_part = 'footnotes' # XXX what about endnotes return mkel('.footnote', {}, [self.handle_p(p, current_part=footnote_part) for p in ps])
def handle_p(self, e, current_part, in_list=False): attrs = {} pPr = first_of_tag(e, P_PROPS_TAG) jc_class = self.JC_TO_CLASS.get(val(pPr, ns.w('jc'))) if jc_class: attrs = add_class(attrs, jc_class) tag = style_to_tag(val(pPr, ns.w('pStyle')) or '') content = iter(e) if pPr is None else pPr.itersiblings() handle_p = partial(self.handle_p_content, current_part=current_part) ans = mkel(tag, attrs, flatmap(handle_p, content)) left_indent = val(pPr, ns.w('ind'), ns.w('left')) or 0.0 indent = int(round(float(left_indent) / self.default_indent_twips)) if (not in_list) and indent: ans = lift_code(ans) ans = mkel('.block', {'indent': indent}, [ans]) ans = hacky_flatten_block(ans) return ans
def handle_p(self, e, current_part, in_list=False): attrs = {} pPr = first_of_tag(e, P_PROPS_TAG) jc_class = self.JC_TO_CLASS.get(val(pPr, ns.w('jc'))) if jc_class: attrs = add_class(attrs, jc_class) tag = style_to_tag(val(pPr, ns.w('pStyle')) or '') content = iter(e) if pPr is None else pPr.itersiblings() handle_p = partial(self.handle_p_content, current_part=current_part) ans = mkel(tag, attrs, flatmap(handle_p, content)) left_indent = val(pPr, ns.w('ind'), ns.w('left')) or 0.0 indent = int(round(float(left_indent) / self.default_indent_twips)) if (not in_list) and indent: ans = lift_code(ans) ans = mkel('.block', {'indent': indent}, [ans]) ans = hacky_flatten_block(ans) return ans
def make_footnote(self, e): # pylint: disable=W0622 id = e.attrib[ns.w('id')] ps = (self.doc.get_footnote if e.tag == FOOTNOTE_REFERENCE_TAG else self.doc.get_endnote)(id).iterfind(P_TAG) footnote_part = 'footnotes' # XXX what about endnotes return mkel('.footnote', {}, [self.handle_p(p, current_part=footnote_part) for p in ps])
def meta_to_html(meta): # pylint: disable=R0914 # FIXME(alexander): this is just a really hacky way to convert the # document properties into something vaguely visually plausible in a style # independent manner head = meta.items() lang = head['lang'] prepend = [] title = head.pop('title', '') # FIXME(alexander): should maybe default to docname? # the only style which does currently not have a title # is letter, so could use subject there if title: prepend.append(mkel('h1', {'class': ['title']}, [title])) subtitle = head.pop('subtitle', '') if subtitle: prepend.append(mkel('h2', {'class': ['subtitle']}, [subtitle])) dl_body = [] types_to_omit = (int, Bibliography, Image, Lang) #FIXME(alexander): toc-depth should be int, # and bibliography-preamble must die keys_to_omit = ('toc-depth', 'bibliography-preamble') for (k, v) in head.iteritems(): is_default_value = 'supplied' not in meta.d[k] if is_default_value: continue a = {'class': [k]} if isinstance(v, types_to_omit) or k in keys_to_omit: a['hidden'] = "" label = meta.d[k].get('label', k.capitalize()) dl_body.append(mkel('dt', a, [lang.localize(label)])) a = a.copy() dd = unparse_literal(v, roundtrip=False) roundtrippable = unparse_literal(v) if dd != roundtrippable: a['data-value'] = roundtrippable dl_body.append( mkel('dd', a, [dd if not isinstance(v, bool) else lang.localize(dd)])) if dl_body: prepend.append(mkel('dl', {'id': 'document-properties'}, dl_body)) return lang.code, title, prepend
def _opf_item(href, id=None, mime=None, properties=None): #pylint: disable=W0622 id = id or href.split('.')[0].replace('/', '-') attrs = { 'id': id, 'href': href, 'media-type': mime or mimetype_of_url(href) } if properties is not None: attrs['properties'] = properties return mkel('item', attrs, [])
def meta_to_html(meta): # pylint: disable=R0914 # FIXME(alexander): this is just a really hacky way to convert the # document properties into something vaguely visually plausible in a style # independent manner head = meta.items() lang = head['lang'] prepend = [] title = head.pop('title', '') # FIXME(alexander): should maybe default to docname? # the only style which does currently not have a title # is letter, so could use subject there if title: prepend.append(mkel('h1', {'class': ['title']}, [title])) subtitle = head.pop('subtitle', '') if subtitle: prepend.append(mkel('h2', {'class': ['subtitle']}, [subtitle])) dl_body = [] types_to_omit = (int, Bibliography, Image, Lang) #FIXME(alexander): toc-depth should be int, # and bibliography-preamble must die keys_to_omit = ('toc-depth', 'bibliography-preamble') for (k, v) in head.iteritems(): is_default_value = 'supplied' not in meta.d[k] if is_default_value: continue a = {'class': [k]} if isinstance(v, types_to_omit) or k in keys_to_omit: a['hidden'] = "" label = meta.d[k].get('label', k.capitalize()) dl_body.append(mkel('dt', a, [lang.localize(label)])) a = a.copy() dd = unparse_literal(v, roundtrip=False) roundtrippable = unparse_literal(v) if dd != roundtrippable: a['data-value'] = roundtrippable dl_body.append(mkel('dd', a, [dd if not isinstance(v, bool) else lang.localize(dd)])) if dl_body: prepend.append(mkel('dl', {'id': 'document-properties'}, dl_body)) return lang.code, title, prepend
def maybe_anchorize_id(tag, attrs, body): """DESTRUCTIVELY push the id into an anchor in the body, in most cases. Anything w/ an id should be linkable; the id should not be used otherwise. """ if 'id' in attrs: if tag not in ('dl', 'ol', 'ul', 'aside'): body.insert(0, mkel('a', dict(name=attrs['id']), [])) del attrs['id']
def handle_run(self, r): # XXX(ash): pylint is right about this being too complex # pylint: disable=R0912 _ = Var('_') ans = [] rPr = first_of_tag(r, RUN_PROPS_TAG) content = rPr.itersiblings() if rPr is not None else iter(r) for e in content: # pylint: disable=W0622 type = e.attrib.get(ns.w('type')) if e.tag == TEXT_TAG: ans.append(e.text) elif e.tag == TAB_TAG: # XXX(alexander): this can also work like a '_' or '…' \dotfill ans.append('\t') elif e.tag in (FOOTNOTE_REF_TAG, ENDNOTE_REF_TAG): # XXX(ash): what is going on here pass elif e.tag == BREAK_TAG and type in ('page', 'column'): ans.append(mkel('.pagebreak', {}, [])) elif e.tag == BREAK_TAG or e.tag == CR_TAG: assert (type is None) or (type == 'textWrapping') ans.append(mkel('br', {}, [])) # FIXME, tags below untested elif e.tag == SOFT_HYPHEN_TAG: ans.append(SOFT_HYPHEN) elif e.tag == NON_BREAKING_HYPHEN_TAG: ans.append(NON_BREAKING_HYPHEN) elif e.tag == ns.w('drawing'): ans.extend( flatmap(self.transclude, e.xpath(self.IMAGE_XPATH, namespaces=ns.dict))) elif e.tag in (FOOTNOTE_REFERENCE_TAG, ENDNOTE_REFERENCE_TAG): ans.append(self.make_footnote(e)) else: # movie, # rt, ruby, rubyAlign etc. for ruby stuff # sym, with special handling for wingdings I guess... log.warn('Unknown tag %r', e.tag) if rPr is not None and ans != Seq[Seq['.footnote', _:], _:]: ans = self.apply_rpr(rPr, ans) return ans
def maybe_anchorize_id(tag, attrs, body): """DESTRUCTIVELY push the id into an anchor in the body, in most cases. Anything w/ an id should be linkable; the id should not be used otherwise. """ if 'id' in attrs: if tag not in ('dl', 'ol', 'ul', 'aside'): body.insert(0, mkel('a', dict(name=attrs['id']), [])) del attrs['id']
def handle_run(self, r): # XXX(ash): pylint is right about this being too complex # pylint: disable=R0912 _ = Var('_') ans = [] rPr = first_of_tag(r, RUN_PROPS_TAG) content = rPr.itersiblings() if rPr is not None else iter(r) for e in content: # pylint: disable=W0622 type = e.attrib.get(ns.w('type')) if e.tag == TEXT_TAG: ans.append(e.text) elif e.tag == TAB_TAG: # XXX(alexander): this can also work like a '_' or '…' \dotfill ans.append('\t') elif e.tag in (FOOTNOTE_REF_TAG, ENDNOTE_REF_TAG): # XXX(ash): what is going on here pass elif e.tag == BREAK_TAG and type in ('page', 'column'): ans.append(mkel('.pagebreak', {}, [])) elif e.tag == BREAK_TAG or e.tag == CR_TAG: assert (type is None) or (type == 'textWrapping') ans.append(mkel('br', {}, [])) # FIXME, tags below untested elif e.tag == SOFT_HYPHEN_TAG: ans.append(SOFT_HYPHEN) elif e.tag == NON_BREAKING_HYPHEN_TAG: ans.append(NON_BREAKING_HYPHEN) elif e.tag == ns.w('drawing'): ans.extend( flatmap(self.transclude, e.xpath(self.IMAGE_XPATH, namespaces=ns.dict))) elif e.tag in (FOOTNOTE_REFERENCE_TAG, ENDNOTE_REFERENCE_TAG): ans.append(self.make_footnote(e)) else: # movie, # rt, ruby, rubyAlign etc. for ruby stuff # sym, with special handling for wingdings I guess... log.warn('Unknown tag %r', e.tag) if rPr is not None and ans != Seq[Seq['.footnote', _:], _:]: ans = self.apply_rpr(rPr, ans) return ans
def unwrap_figures(body): # XXX: this currently only operates at the toplevel, both looking for # paragraphs and also looking for block figures in paragraphs. Strictly # speaking we should probably descend for both. As an additional hack, we # descend, up to the the <td> level, into tables. FATTRS, PATTRS, FBODY = map( Var, 'FATTRS, PATTRS, FBODY'.split(', ')) BLOCK_STYLE_ATTR = Var('BLOCK_STYLE_ATTR', lambda a: a['style']['display'] == 'block') BLOCK_FIG = ('figure', BLOCK_STYLE_ATTR, FBODY) PBODY_WITH_BLOCKFIG = Var('PBODY_WITH_BLOCKFIG', list.__contains__, BLOCK_FIG) for elem in body: if elem and elem[0] in ('table', 'tr', 'td', 'blockquote'): yield mkel(elem[0], elem[1], list(unwrap_figures(elem[-1]))) elif elem in (('p', {}, [('figure', FATTRS, FBODY)]), ('figure', FATTRS, FBODY)): # override style of standalone figures new_fattrs = copy.deepcopy(FATTRS.val) new_fattrs['style']['display'] = 'block' yield mkel('figure', new_fattrs, FBODY.val) # Split a <p> that contains a block figure into # two paragraphs separated by a figure. # This case can only arise due to the # large inline image heuristic; if the paragraph # has an id attribute (shouldn't happen yet), # we put it into the first half of the split. We throw away # empty <p>s. elif elem == ('p', PATTRS, Seq[PBODY_WITH_BLOCKFIG:]): body = PBODY_WITH_BLOCKFIG.val i_fig = body.index(BLOCK_FIG) if body[:i_fig]: yield mkel('p', PATTRS.val, body[:i_fig]) cloned_attrs = dict((k, v) for (k, v) in PATTRS.val.items() if k != 'id') else: cloned_attrs = PATTRS.val yield body[i_fig] if cloned_attrs or body[i_fig+1:]: yield ('p', cloned_attrs, body[i_fig+1:]) else: yield elem
def build_list(cls, tree): _ = Var('_') if isinstance(tree, list): ans = [] for (tag, attr), body in itertools.groupby( tree, lambda x: (_, _) if isinstance(x, list) else x[0]): this_body = [] if tag is _: body, = body ans.append(mkel('.block', {}, cls.build_list(body))) else: for x in body: if isinstance(x, list): item = cls.build_list(x) this_body[-1][2].extend(item) else: item = [x[1]] this_body.append(mkel('li', {}, item)) ans.append(mkel(tag, attr, this_body)) return ans
def hacky_flatten_block(block): # XXX(ash): move to postprocess # pylint: disable=C0103 BLOCK_ATTRS = Var('BLOCK_ATTRS') P_ATTRS = Var('P_ATTRS') BODY = Var('BODY') if block == ('.block', BLOCK_ATTRS, [('p', P_ATTRS, BODY)]): return mkel('.block', merge_attrs(BLOCK_ATTRS.val, P_ATTRS.val), BODY.val) else: return block
def unwrap_figures(body): # XXX: this currently only operates at the toplevel, both looking for # paragraphs and also looking for block figures in paragraphs. Strictly # speaking we should probably descend for both. As an additional hack, we # descend, up to the the <td> level, into tables. FATTRS, PATTRS, FBODY = map(Var, 'FATTRS, PATTRS, FBODY'.split(', ')) BLOCK_STYLE_ATTR = Var('BLOCK_STYLE_ATTR', lambda a: a['style']['display'] == 'block') BLOCK_FIG = ('figure', BLOCK_STYLE_ATTR, FBODY) PBODY_WITH_BLOCKFIG = Var('PBODY_WITH_BLOCKFIG', list.__contains__, BLOCK_FIG) for elem in body: if elem and elem[0] in ('table', 'tr', 'td', 'blockquote'): yield mkel(elem[0], elem[1], list(unwrap_figures(elem[-1]))) elif elem in (('p', {}, [('figure', FATTRS, FBODY)]), ('figure', FATTRS, FBODY)): # override style of standalone figures new_fattrs = copy.deepcopy(FATTRS.val) new_fattrs['style']['display'] = 'block' yield mkel('figure', new_fattrs, FBODY.val) # Split a <p> that contains a block figure into # two paragraphs separated by a figure. # This case can only arise due to the # large inline image heuristic; if the paragraph # has an id attribute (shouldn't happen yet), # we put it into the first half of the split. We throw away # empty <p>s. elif elem == ('p', PATTRS, Seq[PBODY_WITH_BLOCKFIG:]): body = PBODY_WITH_BLOCKFIG.val i_fig = body.index(BLOCK_FIG) if body[:i_fig]: yield mkel('p', PATTRS.val, body[:i_fig]) cloned_attrs = dict( (k, v) for (k, v) in PATTRS.val.items() if k != 'id') else: cloned_attrs = PATTRS.val yield body[i_fig] if cloned_attrs or body[i_fig + 1:]: yield ('p', cloned_attrs, body[i_fig + 1:]) else: yield elem
def hacky_flatten_block(block): # XXX(ash): move to postprocess # pylint: disable=C0103 BLOCK_ATTRS = Var('BLOCK_ATTRS') P_ATTRS = Var('P_ATTRS') BODY = Var('BODY') if block == ('.block', BLOCK_ATTRS, [('p', P_ATTRS, BODY)]): return mkel('.block', merge_attrs(BLOCK_ATTRS.val, P_ATTRS.val), BODY.val) else: return block
def build_list(cls, tree): _ = Var('_') if isinstance(tree, list): ans = [] for (tag, attr), body in itertools.groupby( tree, lambda x: (_, _) if isinstance(x, list) else x[0]): this_body = [] if tag is _: body, = body ans.append(mkel('.block', {}, cls.build_list(body))) else: for x in body: if isinstance(x, list): item = cls.build_list(x) this_body[-1][2].extend(item) else: item = [x[1]] this_body.append(mkel('li', {}, item)) ans.append(mkel(tag, attr, this_body)) return ans
def apply_html_style(tag, run): ''' >>> run1 = mkel('w:r', {}, ['...']) >>> run2 = apply_html_style('b', run1) >>> run2 # doctest: +NORMALIZE_WHITESPACE ('w:r', {}, [('w:rPr', {}, [('w:b', {'w:val': '1'}, [])]), '...']) >>> apply_html_style('i', run2) # doctest: +NORMALIZE_WHITESPACE ('w:r', {}, [('w:rPr', {}, [('w:b', {'w:val': '1'}, []), ('w:i', {'w:val': '1'}, [])]), '...']) ''' rpr = { 'u': mkel('w:u', {'w:val': 'single'}, []), 'b': mkel('w:b', {'w:val': '1'}, []), 's': mkel('w:strike', {'w:val': '1'}, []), 'i': mkel('w:i', {'w:val': '1'}, []), }[tag] t, a, b = run if b[0][:1] == ('w:rPr',): assert b[0][1] == {} rprs = b[0][2] + [rpr] b = b[1:] else: rprs = [rpr] b = [mkel('w:rPr', {}, rprs)] + b return mkel(t, a, b)
def apply_html_style(tag, run): ''' >>> run1 = mkel('w:r', {}, ['...']) >>> run2 = apply_html_style('b', run1) >>> run2 # doctest: +NORMALIZE_WHITESPACE ('w:r', {}, [('w:rPr', {}, [('w:b', {'w:val': '1'}, [])]), '...']) >>> apply_html_style('i', run2) # doctest: +NORMALIZE_WHITESPACE ('w:r', {}, [('w:rPr', {}, [('w:b', {'w:val': '1'}, []), ('w:i', {'w:val': '1'}, [])]), '...']) ''' rpr = { 'u': mkel('w:u', {'w:val': 'single'}, []), 'b': mkel('w:b', {'w:val': '1'}, []), 's': mkel('w:strike', {'w:val': '1'}, []), 'i': mkel('w:i', {'w:val': '1'}, []), }[tag] t, a, b = run if b[0][:1] == ('w:rPr', ): assert b[0][1] == {} rprs = b[0][2] + [rpr] b = b[1:] else: rprs = [rpr] b = [mkel('w:rPr', {}, rprs)] + b return mkel(t, a, b)
def make_opf( head, parts, transclusions, includes, # pylint: disable=R0913,R0914 cover_image=None, compat=False): """Create package.opf contents. `compat`: whether to create an epub2 compatible package FIXME(alexander): not fully implemented """ title = head['title'] dublin, dublin_ns = meta_to_dublin_core(head) manifest_body = [] spine_body = [] manifest_body.extend(_opf_item(inc) for inc in includes) if compat: manifest_body.append(_opf_item('toc.ncx', 'ncx')) manifest_body.append(_opf_item('toc.xhtml', properties='nav')) spine_body.append(mkel('itemref', {'idref': 'toc', 'linear': 'no'}, [])) if cover_image: cover_src = transclusions.add_literal_image(cover_image) manifest_body.extend(make_cover_opf(cover_image, cover_src)) spine_body.append( mkel('itemref', { 'idref': 'cover', 'linear': 'no' }, [])) else: cover_src = None for part in parts: manifest_body.append(_opf_item(part + '.xhtml')) spine_body.append(mkel('itemref', { 'idref': part, 'linear': 'yes' }, [])) # images manifest_body.extend( _opf_item( k, id='img-' + k.split('.')[0], mime=transclusions.get_mimetype(k)) for (k, _) in transclusions.iteritems() if k != cover_src) package_body = [ dublin, mkel('manifest', {}, manifest_body), mkel('spine', {'toc': 'ncx'} if compat else {}, spine_body) ] if compat: package_body.append(('guide', {}, [('reference', dict(type='toc', title=title, href='toc.xhtml'), [])])) package = mkel('package', { 'version': '3.0', 'unique-identifier': 'uuid' }, package_body) ns = {(k if k != 'opf' else None): v for (k, v) in dublin_ns.iteritems()} return package, ns
def whack_elt(pred, body, kill_body=False): res = [] for e in body: if isinstance(e, basestring): res.append(e) else: bh, ba, bb = e if pred(e): if not kill_body: res.extend(whack_elt(pred, bb, kill_body)) else: res.append(mkel(bh, ba, whack_elt(pred, bb, kill_body))) return res
def bad_command(self, head, attrs, body): assert head in ('LIT', 'CMD') bad_cmd = attrs['class'][0] n = docproblem('Unknown command: {}', bad_cmd) warning = small(red(self.latexify( u"CONVERSION ERROR: Not a valid command" u" (only use underlining for commands): “"))) the_cmd = self.latexify( mkel('u', {}, [bad_cmd + (':' if head == 'CMD' else '')])) warning_end = small(red(self.latexify(u'”'))) return join(problem_anchor(n, join(warning, the_cmd, warning_end)), self.latexify(body))
def whack_elt(pred, body, kill_body=False): res = [] for e in body: if isinstance(e, basestring): res.append(e) else: bh, ba, bb = e if pred(e): if not kill_body: res.extend(whack_elt(pred, bb, kill_body)) else: res.append(mkel(bh, ba, whack_elt(pred, bb, kill_body))) return res
def underlines_to_commands(parsed_body, lstrip=False): # pylint: disable=R0912 CATTRS, CBODY = map( Var, 'CATTRS, CBODY'.split(', ')) reparsed = [] appendpoint = reparsed for i, e in enumerate(parsed_body): if e == ('u', CATTRS, CBODY): assert CATTRS.val == {} assert len(CBODY.val) == 1 # underlines can hide invisible whitespace # FIXME(alexander): should make sure this is plain text # bogus underlined footnoterefs can e.g. mess this up cmd = CBODY.val[0].strip() if cmd.endswith(':'): # take args reparsed.append( mkcmd(cmd[:-1].lower(), underlines_to_commands(parsed_body[i+1:], True))) return reparsed elif cmd[:1] == cmd[-1:] == '$': reparsed.append(mkcmd('tex', [r'\(%s\)' % cmd[1:-1]])) elif cmd[:1] == '\\': # FIXME(alexander): this should probably be parsed reparsed.append(mkcmd('tex', [cmd])) elif cmd[:1] == '<' and cmd[-1] == '>': # FIXME(alexander): reparsed.extend(tidy(parse_chunk(cmd))) mkerr([cmd], 'Underlined tags must be well-formed xml') # Transform (invisibly, in GDocs) underlined whitespace to plain # whitespace. This should not break up underlined runs of text, # because at this point we should already have coalesced those. elif not cmd: if CATTRS.val: log.warn('Ignoring bogus attributes in `<u> </u>`: %r', CATTRS.val) cbody, = CBODY.val reparsed.append(cbody) else: assert cmd reparsed.append(mklit(cmd.lower())) lstrip = False else: if isinstance(e, basestring): if lstrip: e = e.lstrip() if e: appendpoint.append(e) else: appendpoint.append(mkel(e[0], e[1], underlines_to_commands(e[2], lstrip))) lstrip = False return reparsed