def meta_to_runs(what, intern_image, total_w): # pylint: disable=R0911 recurse = partial(meta_to_runs, intern_image=intern_image, total_w=total_w) if isinstance(what, basestring): return [mkel('w:r', {}, [mk_t(what)])] elif isinstance(what, list): return flatmap(recurse, what) elif isinstance(what, tuple): t, _, b = what runs = recurse(b) if t in ('b', 'i', 's', 'u'): return [apply_html_style(t, run) for run in runs] else: log.warn("Didn't understand html tag %r", what) return runs elif isinstance(what, literal.Image): rid = intern_image(what) target_w = parse_percentage(what.style['width']) * total_w w, h = what.get_size() w, h = [docxlite.Emu(x * target_w / h) for x in (w, h)] inline = (what.style['display'] == 'inline') return [mkel('w:r', {}, [make_pic(rid, w, h, inline)])] elif isinstance(what, literal.Bibliography): return recurse(what.data) else: log.warn('Fallthrough: %r', what) return recurse(unparse_literal(what))
def meta_to_html(meta): # pylint: disable=R0914 # FIXME(alexander): this is just a really hacky way to convert the # document properties into something vaguely visually plausible in a style # independent manner head = meta.items() lang = head['lang'] prepend = [] title = head.pop('title', '') # FIXME(alexander): should maybe default to docname? # the only style which does currently not have a title # is letter, so could use subject there if title: prepend.append(mkel('h1', {'class': ['title']}, [title])) subtitle = head.pop('subtitle', '') if subtitle: prepend.append(mkel('h2', {'class': ['subtitle']}, [subtitle])) dl_body = [] types_to_omit = (int, Bibliography, Image, Lang) #FIXME(alexander): toc-depth should be int, # and bibliography-preamble must die keys_to_omit = ('toc-depth', 'bibliography-preamble') for (k, v) in head.iteritems(): is_default_value = 'supplied' not in meta.d[k] if is_default_value: continue a = {'class': [k]} if isinstance(v, types_to_omit) or k in keys_to_omit: a['hidden'] = "" label = meta.d[k].get('label', k.capitalize()) dl_body.append(mkel('dt', a, [lang.localize(label)])) a = a.copy() dd = unparse_literal(v, roundtrip=False) roundtrippable = unparse_literal(v) if dd != roundtrippable: a['data-value'] = roundtrippable dl_body.append( mkel('dd', a, [dd if not isinstance(v, bool) else lang.localize(dd)])) if dl_body: prepend.append(mkel('dl', {'id': 'document-properties'}, dl_body)) return lang.code, title, prepend
def meta_to_html(meta): # pylint: disable=R0914 # FIXME(alexander): this is just a really hacky way to convert the # document properties into something vaguely visually plausible in a style # independent manner head = meta.items() lang = head['lang'] prepend = [] title = head.pop('title', '') # FIXME(alexander): should maybe default to docname? # the only style which does currently not have a title # is letter, so could use subject there if title: prepend.append(mkel('h1', {'class': ['title']}, [title])) subtitle = head.pop('subtitle', '') if subtitle: prepend.append(mkel('h2', {'class': ['subtitle']}, [subtitle])) dl_body = [] types_to_omit = (int, Bibliography, Image, Lang) #FIXME(alexander): toc-depth should be int, # and bibliography-preamble must die keys_to_omit = ('toc-depth', 'bibliography-preamble') for (k, v) in head.iteritems(): is_default_value = 'supplied' not in meta.d[k] if is_default_value: continue a = {'class': [k]} if isinstance(v, types_to_omit) or k in keys_to_omit: a['hidden'] = "" label = meta.d[k].get('label', k.capitalize()) dl_body.append(mkel('dt', a, [lang.localize(label)])) a = a.copy() dd = unparse_literal(v, roundtrip=False) roundtrippable = unparse_literal(v) if dd != roundtrippable: a['data-value'] = roundtrippable dl_body.append(mkel('dd', a, [dd if not isinstance(v, bool) else lang.localize(dd)])) if dl_body: prepend.append(mkel('dl', {'id': 'document-properties'}, dl_body)) return lang.code, title, prepend
def xmp_meta(self, head): """Create XMP metadata for pdf (via hyperxmp.sty). Note that generating well structured pdf output with latex is a fool's errand, so this has some shortcomings: - dc:language should be of type 'bag'. - XMP and info entries aren't synched for 'xmp:CreateDate'/'CreationDate' and 'pdf:Producer'/'Producer'. - For PDF/A we'd also need 'pdfaid:part' and 'pdfaid:conformance', on first sight it looks like hypermp.sty takes a hardcoded guess, with two possible outcomes: nothing or PDF/A-1b. In an ideal world we'd probably only create valid PDF/A-2u or PDF/X documents, but both seem pretty much impossible to achieve from latex directly, with even the trivial metadata stuff above being a pain and then we'd also need to deal, at the very least, with ICC Profiles and unicode mappings (already somewhat painful for reasons of historical baggage in PDF and the font standards it supports and really horrible in latex because e.g. of issues with zero-width glyphs and math characters without unicode equivalent). PDF/A-2a would also require tagging. """ fallbacks = {'description': 'abstract'} xmps = [] for k in 'lang title author description keywords copyright'.split(): if k in head: mk = k else: if not k in fallbacks or fallbacks[k] not in head: continue mk = fallbacks[k] v = unparse_literal(head[mk], roundtrip=False, plain=True) # typesetr uses ';' to separate fields (like keywords or multiple # authors), because ',' is often ambiguous where ';' almost never # is. if k in self._COMMA_SEPARATED_XMP_FIELDS: if ',' in v: v = v.replace(',', self._PRIVATE1).replace(';', ',') latex_v = cmd('xmpquote', [], [ self.latexify(v).replace(self._PRIVATE1, r'\xmpcomma{}') ]) else: latex_v = self.latexify(v.replace(';', ',')) else: latex_v = self.latexify(v) xmps.append( raw('pdf%s={%s}' % (k if k != 'description' else 'subject', latex_v))) # FIXME(alexander): append version info? xmps.append(raw('pdfcreator={Typesetr}')) return ',\n'.join(xmps)
def meta_to_dublin_core(head, modified=None): modified = datetime.datetime.utcnow() if modified is None else modified ts = modified.isoformat().split('.')[0] + 'Z' assert 'uuid' in head dublin = [] head = head.copy() dc = 'dc:'.__add__ ids = Counter() for h, d, v in ((k, d, head[k]) for (k, d) in HEAD_TO_CORE.iteritems() if k in head): if h in MARC_RELATOR: # FIXME(alexander): make authors etc. first class type people = (re.split(r'\s*;\s*', v) if isinstance(v, basestring) else v) for a in people: ids[d] += 1 person = Person(a) pid = "pub-%s-%d" % (d, ids[d]) dublin.append(mkel(dc(d), {'id': pid}, [person.display_name])) dublin.append( mkel( 'meta', { 'refines': pid, 'property': 'role', 'scheme': 'marc:relators' }, [MARC_RELATOR[h]])) dublin.append( mkel('meta', { 'refines': pid, 'property': 'file-as' }, [person.sort_name])) else: unparsed = unparse_literal(v, roundtrip=False, plain=True) if not unparsed: continue attrs = dict(id=h) if h in IDED_ELEMENTS else {} dublin.append(mkel(dc(d), attrs, [unparsed])) if h in TITLE_ELEMENTS: dublin.append( mkel('meta', { 'refines': '#%s' % attrs['id'], 'property': 'title-type' }, [{ 'title': 'main' }.get(h, h)])) dublin.append( mkel('meta', { 'refines': '#%s' % attrs['id'], 'property': 'display-seq' }, [str(TITLE_ELEMENTS.index(h) + 1)])) dublin.append(mkel('meta', {'property': 'dcterms:modified'}, [ts])) return ('metadata', {}, dublin), DUBLIN_META_NS
def xmp_meta(self, head): """Create XMP metadata for pdf (via hyperxmp.sty). Note that generating well structured pdf output with latex is a fool's errand, so this has some shortcomings: - dc:language should be of type 'bag'. - XMP and info entries aren't synched for 'xmp:CreateDate'/'CreationDate' and 'pdf:Producer'/'Producer'. - For PDF/A we'd also need 'pdfaid:part' and 'pdfaid:conformance', on first sight it looks like hypermp.sty takes a hardcoded guess, with two possible outcomes: nothing or PDF/A-1b. In an ideal world we'd probably only create valid PDF/A-2u or PDF/X documents, but both seem pretty much impossible to achieve from latex directly, with even the trivial metadata stuff above being a pain and then we'd also need to deal, at the very least, with ICC Profiles and unicode mappings (already somewhat painful for reasons of historical baggage in PDF and the font standards it supports and really horrible in latex because e.g. of issues with zero-width glyphs and math characters without unicode equivalent). PDF/A-2a would also require tagging. """ fallbacks = {'description': 'abstract'} xmps = [] for k in 'lang title author description keywords copyright'.split(): if k in head: mk = k else: if not k in fallbacks or fallbacks[k] not in head: continue mk = fallbacks[k] v = unparse_literal(head[mk], roundtrip=False, plain=True) # typesetr uses ';' to separate fields (like keywords or multiple # authors), because ',' is often ambiguous where ';' almost never # is. if k in self._COMMA_SEPARATED_XMP_FIELDS: if ',' in v: v = v.replace(',', self._PRIVATE1).replace(';', ',') latex_v = cmd('xmpquote', [], [self.latexify(v).replace( self._PRIVATE1, r'\xmpcomma{}')]) else: latex_v = self.latexify(v.replace(';', ',')) else: latex_v = self.latexify(v) xmps.append(raw('pdf%s={%s}' % (k if k != 'description' else 'subject', latex_v))) # FIXME(alexander): append version info? xmps.append(raw('pdfcreator={Typesetr}')) return ',\n'.join(xmps)
def odtify(tree, required_styles, images): # FIXME(alexander): remove this disgusting crap; # how do Images work ATM? if isinstance(tree, literal.Bibliography): return odtify(tree.data, required_styles, images) if isinstance(tree, basestring): return odtify_basestring(tree) if isinstance(tree, list): return "".join(odtify(e, required_styles, images) for e in tree) if isinstance(tree, tuple): t, a, b = tree return HTML_TO_ODT[t](a, b, required_styles, images) if isinstance(tree, literal.Image): return odt_image(tree, images) else: log.warn('Fallthrough: %r', tree) return odtify(unparse_literal(tree), required_styles, images)
def meta_to_dublin_core(head, modified=None): modified = datetime.datetime.utcnow() if modified is None else modified ts = modified.isoformat().split('.')[0] + 'Z' assert 'uuid' in head dublin = [] head = head.copy() dc = 'dc:'.__add__ ids = Counter() for h, d, v in ((k, d, head[k]) for (k, d) in HEAD_TO_CORE.iteritems() if k in head): if h in MARC_RELATOR: # FIXME(alexander): make authors etc. first class type people = (re.split(r'\s*;\s*', v) if isinstance(v, basestring) else v) for a in people: ids[d] += 1 person = Person(a) pid = "pub-%s-%d" % (d, ids[d]) dublin.append(mkel(dc(d), {'id': pid}, [person.display_name])) dublin.append(mkel('meta', {'refines': pid, 'property': 'role', 'scheme':'marc:relators'}, [MARC_RELATOR[h]])) dublin.append(mkel('meta', {'refines': pid, 'property': 'file-as'}, [person.sort_name])) else: unparsed = unparse_literal(v, roundtrip=False, plain=True) if not unparsed: continue attrs = dict(id=h) if h in IDED_ELEMENTS else {} dublin.append(mkel(dc(d), attrs, [unparsed])) if h in TITLE_ELEMENTS: dublin.append(mkel('meta', {'refines': '#%s' % attrs['id'], 'property': 'title-type'}, [{'title': 'main'}.get(h, h)])) dublin.append(mkel('meta', {'refines': '#%s' % attrs['id'], 'property': 'display-seq'}, [str(TITLE_ELEMENTS.index(h) + 1)])) dublin.append(mkel('meta', {'property': 'dcterms:modified'}, [ts])) return ('metadata', {}, dublin), DUBLIN_META_NS
def error(problem, k, supplied=None): errors[k] = OrderedDict([('error', problem)]) if supplied is not None: errors[k]['supplied'] = unparse_literal(supplied) if k in self._info: errors[k]['canonical'] = self._info[k]['default']
def validate_and_augment(self, meta): # pylint: disable=R0912 canonical_meta = copy.deepcopy(meta) parsed = {} errors = OrderedDict() def error(problem, k, supplied=None): errors[k] = OrderedDict([('error', problem)]) if supplied is not None: errors[k]['supplied'] = unparse_literal(supplied) if k in self._info: errors[k]['canonical'] = self._info[k]['default'] def check_supplied(): # pylint: disable=R0912 def try_to_reify(v, parse): try: return parse(v) except (KeyboardInterrupt, SystemExit): raise except Exception as ex: # pylint: disable=W0631 log.info('Meta conversion error on %s, %s', k, ex) error('Not a valid %s format (expected %s)' % ( right_type, TYPE_EXAMPLES[right_type]), k, supplied=meta[k]) for k in meta: canonical_meta[k] = meta[k] # default if k not in self._info: maybe_meants = spellsuggest.spell_suggest( k, self._info.keys()) suggestion = (" (did you mean '%s'?)" % maybe_meants[0] if maybe_meants else '') if k not in ('title', 'subtitle'): error("Unexpected field '%s'%s" % (k, suggestion), k, meta[k]) else: error("This document type does not have a %s" % k, k, meta[k]) continue potential_types = PY_TYPE_TO_TYPESETR_TYPES[type(meta[k])] right_type = self._info[k]['type'] if right_type in potential_types: if right_type == 'bibliography': parsed[k] = try_to_reify(meta[k], Bibliography) else: if 'rich-text' in potential_types: if not isinstance(meta[k], basestring): meta[k] = postprocess.plaintextify(meta[k]) potential_types = ('text',) if potential_types == ('text',): parsed[k] = try_to_reify( meta[k], # pylint: disable=W0640 lambda v: parse_literal(v, right_type)) else: error("Expected meta field '%s:' to be" " of type '%s', not '%s'" % ( k, right_type, potential_types[0]), k, supplied=meta[k]) def check_required(): for k in self._required: if k == 'title': continue # see __init__ comment ## assert 'default' not in self._info[k], \ ## "metadata.yml error -- %s has a required default" % k if k not in meta: right_type = self._info[k]['type'] if right_type == 'text': error('This field is required', '', k) else: error('This field (of type %s)' ' is required' % right_type, k) check_supplied() check_required() meta_ans = OrderedDict() for k in meta.keys() + [k for k in self._info.keys() if k not in meta]: if k not in errors: meta_ans[k] = OrderedDict() if k in meta: meta_ans[k]['supplied'] = unparse_literal(meta[k]) # the canonical form is a string representation (for now) but # we don't just want to use the string that was supplied (e.g. # 'YES'), we want to canonicalize it. We achieve that by # unparsing the parsed version if any; unparsing a string is # idempotent hence values that are already strings are left as # is canonical = unparse_literal( parsed.get(k, meta.get(k, self._defaults[k]))) if canonical != meta_ans[k].get('supplied'): meta_ans[k]['canonical'] = canonical else: meta_ans[k] = errors[k] if (self._info.get(k, {}).get('label', self.default_label(k)) != self.default_label(k)): # pylint: disable=C0330 meta_ans[k]['label'] = self._info[k]['label'] if k in self._info and self._info[k]['type'] != 'text': meta_ans[k]['type'] = self._info[k]['type'] return MetaInfo(meta_ans)