def merge_ams(amsmath, amshelp, lshort): for amsk, amsv in chain(amsmath.items(), amshelp.items()): if amsk not in lshort: lshort.update({amsk: amsv}) continue lvdict = lshort[amsk] for k, v in amsv.items(): # Can't just check ``__contains__`` here bec. some vals are None. if k not in lvdict or (v in lvdict[k] if is_seq(lvdict[k]) else v == lvdict[k]): continue if k not in ('name', 'meta'): lvdict.update({k: enlist(v, lvdict[k])}) elif k == 'meta' and v is not None: if lvdict['meta'] is None: lvdict['meta'] = {} for mk, mv in v.items(): if mk not in lvdict['meta']: lvdict['meta'].update({mk: mv}) else: # This doesn't run, but add concat logic if that # ever changes. pass else: assert v is None
def make_packs(MJ): # Create base structure from ``packages`` in ``get_cwl``. packages, plut = packs_by_cat() # # XXX - See todo in front matter. # tools = dict(includes=get_manifest('tools')) # # Create ``web``, and ``other`` misc and meta packages. web = dict(environments={}, commands={}, options={}) other = dict(environments={}, commands={}, options={}) # ``plut()`` will remain ignorant of updates to these, so they must be # queried seperately. for np_name, np in zip(('misc-web', 'misc-other'), (web, other)): packages.update({np_name: np}) # allcats = dict( (k, {}) for k in RF.keys() if k not in ('unknowns', 'files')) allcats.update(encodings={}, filetypes={}) # Category changes: ``filetypes`` -> ``files``, ``unknowns`` -> (removed) p2s = {'classes': 'class'} for category in allcats: rf = (RF[category] if category not in ('filetypes', 'encodings') else RF['files'] if category != 'encodings' else {}) mj = dict( (x, y) for x, y in MJ.items() if enlist(y['type'], y['type']) == enlist( p2s.setdefault(category, category.rstrip('s')), y['type'])) # if category == 'commands': mj.update((x, y) for x, y in MJ.items() if y['type'] == 'font') elif category == 'filetypes': mj.update( (x, y) for x, y in MJ.items() if y['type'] == 'extension') # # Consolidate entry vals in cat and package dicts. for key in rf.keys() | mj.keys(): # Split off procedural logic, continued here... allcats[category][key] = build_entry(category, key, rf, mj) # Likewise for dealing with packages... update_packs(packages, category, key, web, other, dict(allcats[category][key]), plut(category, key)) # fix_modes(packages, inspect=False) add_classes(packages, allcats) return allcats, packages
def fix_modes(packages, inspect=True): """Attempt to fill empty ``mode`` vals with best guess. Currently, this means almost everything gets assigned ``['text']``. """ # XXX - Absent some reliable, authoritative data source, mislabled modes # are here to stay. Scraping docs is not an option and relying on # (crowdsourced) cwl classifiers lacking. # # These are for the inspection helper below. nomode = [] hasmode = [] # Lame kludge to manually tweak modes mislabeled by KaTeX or MathJax. force_text = ('color', ) # for pack, pdata in packages.items(): if not pdata: continue for cat, cdata in pdata.items(): if not cdata or cat not in 'commands environments'.split(): continue for entry, edata in cdata.items(): if inspect: outtup = (pack, cat, entry, edata['mode']) if edata['mode']: if pack in force_text: edata['mode'] = enlist(edata['mode'], 'text', ret_type=list) if inspect: hasmode.append(outtup) else: if inspect: nomode.append(outtup) else: if 'math' in pack and cat == 'commands': edata['mode'] = ['math', 'text'] else: edata['mode'] = ['text'] # def inspect_modes(withmode=True, ret=False): curlist = hasmode if withmode else nomode if ret: return curlist for tup in curlist: print('{:<{w}}{:<{w}}{:<{w}}{!r:<{w}}'.format(*(s + ': ' for s in tup[:-2]), *tup[-2:], w=20)) print('\n%s items found.' % len(curlist)) # if inspect: return inspect_modes
def apply_classifiers(indict, classifier): """Create relevant metadata items in category entries""" if classifier is None: return # Freeze/apply ``classifier`` as arg 1. from functools import partial tc = partial(test_classifier, classifier) # keys = get_tests() force_math = False if tc(keys.env_aliases): existing = indict.get('env_aliases') env_aliases = classifier.lstrip('\\').split(',') assert existing is None or set(existing) == set(env_aliases) indict.update(env_aliases=env_aliases) if 'math' in env_aliases: force_math = True else: return elif tc(keys.environments): existing = indict.get('environments') environments = classifier.lstrip('/').split(',') assert existing is None or set(existing) == set(environments) indict.update(environments=environments) if 'math' in environments: force_math = True else: return if force_math is True: mode = indict.setdefault('mode', []) if 'math' not in mode: mode.append('math') mode.sort() return modes = keys._fields[:4] modes_res = tuple(map(tc, (keys._asdict()[m] for m in modes))) # According to the spec, these mode flags are mutually exclusive, so # appending isn't strictly necessary. if any(modes_res): mode = indict.setdefault('mode', []) for m, r in zip(modes, modes_res): # Modes other than 'math' and 'text' are invalid. Store them in # `environments` instead. See `elif` block above... if r and m not in keys._fields[:2]: existing = indict.setdefault('environments', [m]) if existing != [m]: # Never runs as of initial commit. indict.update( environments=enlist(existing, m, ret_type=list)) if r and m not in mode and m in keys._fields[:2]: mode.append(m) mode.sort()
def build_entry(category, key, rf_dict, mj_dict): rf = rf_dict.get(key, {}) mj = mj_dict.get(key, {}) vdict = {'name': key, 'mode': None, 'type': None, 'meta': {}} if category == 'commands': sym = mj.get('symbol') # Some refman @item commands have unintegrated unicode symbols... if not sym: docstr = rf.get('meta', {}).get('doc') if docstr and 'u+' in docstr.lower(): ds_start = docstr.lower().find('u+') ds_end = docstr.lower().find(' ', ds_start) sym = eval('"\\' + docstr[ds_start:ds_end].lower().replace('+', '') + '"') vdict.update(symbol=sym) # Compare types... ty = set(enlist(rf.get('type'), mj.get('type'))) - {None} if rf and mj and len(ty) > 1: # lshort classifies `{\\sl ...}`-like commands as fonts... if ty == {'command', 'font'}: ty.remove('font') # Filetype: lshort uses "extension" while refman uses "file" elif ty == {'file', 'extension'}: ty = {'filetype'} else: assert ty == {'environment', 'package'} if ty & {'file', 'extension'}: ty = {'filetype'} vdict['type'] = ty.pop() if len(ty) == 1 else list(ty) # Combine modes if not equal... vdict['mode'] = sorted(set(rf.get('mode', [])) | set(mj.get('mode', []))) # Consolidate meta items... mjM = mj.get('meta', {}) rfM = rf.get('meta', {}) if len((set(mjM.keys()) if mjM is not None else set()) & (set(rfM.keys()) if rfM is not None else set())): raise Exception('MetaKey Conflict:\n%s\n%s\n%s' % (mjM, rfM)) outM = {} outM.update((list(mjM.items()) if mjM is not None else []) + (list(rfM.items()) if rfM is not None else [])) vdict['meta'] = outM # Stringify curated meta items... vdict['info'] = bake_info_string(outM) return vdict # # Remove dups from unicode-math list, for outputting difference below. if key in UD: UD.pop(key)
def update_packs(packages, category, key, web, other, entry, packnames): # If changing this string, do the same in ``add_classes``... infoblurb = 'See "{}" in latexrefman ({}).' # These are universal ``\\documentclass`` options that weren't labeled as # such in ``get_refman``. Even though they're (correctly) claimed by other # more specialized classes, add them to ``misc-other``. if category == 'options': if entry['meta'].get('command') == '\\documentclass': if ('package' in entry['meta'] and not entry['meta']['package'].startswith('class')): entry['meta'].pop('package') docopts = other[category].setdefault('\\documentclass', []) if key not in docopts: docopts.append(key) docopts.sort() if not packnames: # Lots of spam like ``en dash``, etc., are unneeded. if category == 'commands' and not key.startswith('\\'): return # This can't go in outer block because "else" uses signature. if 'refman' in entry['meta']: blurb = infoblurb.format(key, entry['meta']['refman']) if all(s not in entry['info'].lower() for s in ('deprecate', 'obsolete')): entry['info'] = blurb # XXX - Empty info strings should really be addressed in source scripts if entry['info'] == '': entry.update(info=None) # Add info string to package -> info... if entry['type'] == 'package': if key in packages and entry['info']: assert 'refman' in entry['meta'] # blurb = infoblurb.format(key, entry['meta']['refman']) packages[key].update(info=blurb) # Deal with the special case of options... if category == 'options': mcom = entry['meta'].get('command') mpack = entry['meta'].get('package') if mpack: assert mcom is None packopts = packages[mpack][category] optdict = packopts.setdefault('\\usepackage', {}) if key not in optdict: optdict.setdefault(key, None) return # Toss these in ``misc-web``, otherwise ``misc-other``. if any(k in entry['meta'] for k in ('mathjax', 'katable')): assert category != 'options' if category in web: assert key not in web[category] if key in ('\\begin', '\\end'): # XXX - This should probably go elsewhere. Add missing # ``\\begin`` and ``\\end`` commands to ``latex-document`` entry.update(mode=['math', 'text']) packages['latex-document']['commands'][key] = entry else: web[category][key] = entry elif category in other: if category == 'options': assert mcom is not None optlist = other[category].setdefault(mcom, []) if key not in optlist: optlist.append(key) optlist.sort() return assert key not in other[category] other[category][key] = entry # Ditch all meta items. For these, they are redundant. entry.pop('meta') entry.pop('type') return assert category != 'packages' for pname in packnames: # Deal with the special case of options first... if category == 'options': pname, pcmd = pname mcom = entry['meta'].get('command') mpack = entry['meta'].get('package') packopts = packages[pname][category] # Currently, ``packages -> package -> options`` are just lists, so # can't add any reference info... XXX - Consider changing this... if not mpack: assert mcom and mcom == pcmd continue assert mpack != pname and mcom and (mpack, mcom) not in packnames # XXX - Seems above is a roundabout way of asserting this... assert mcom != '\\usepackage' # When above assertion fails, must use dict instead of list... optlist = packages[mpack][category].setdefault(mcom, []) if key not in optlist: optlist.append(key) optlist.sort() continue pack = packages[pname][category][key] # Prefer signatures from cwl, otherwise refman reference... if is_seq(pack['sig']): sig = '\n'.join(pack['sig']) + '\n' elif pack['sig'] is not None: sig = pack['sig'] + '\n' else: sig = '' # # XXX - These blurbs don't add any real value. Ultimately, need to # get more accurate docstrings or forgo the info/preview feature. pack.update(info=None) if 'refman' in entry['meta']: assert all(s not in entry['info'].lower() for s in ('deprecate', 'obsolete')) blurb = infoblurb.format(key, entry['meta']['refman']) pack.update(info=(sig + blurb)) # if len(key) + 1 < len(entry['info']): # if 'See docs' in entry['info']: # pack.update(info=entry['info']) # else: # pack.update(info=(entry['info'] + '\n' + blurb)) # else: # pack.update(info=(sig + blurb)) # XXX - The catch-all amalgam of all meta nonsense mainly applies to # math commands and is pretty annoying. If searching docstrings # were somehow an opton, perhaps they'd be justified... # # elif entry['info']: # pack.update(info=(sig + entry['info'])) # # Update ``symbol`` and ``mode`` vals... if 'symbol' in pack and pack['symbol'] is None: pack.update(symbol=entry['symbol']) if pack['mode']: pack.update( mode=enlist(pack['mode'], entry['mode'], ret_type=list)) else: pack.update(mode=entry['mode']) # XXX - Ensure all meta keys ticked during reckoning. Delete if consistent, # but save list; errant keys often added carelessly in dependencies... mkeys = ('alt_doc ams atom codepoints command doc katable lshort ' 'mathjax package pre refman speaktext uniname') for mkey in mkeys.split(): if mkey in entry['meta']: entry['meta'].pop(mkey) assert not len(entry['meta'])
return tuple(replace_atoms(a) for a in atom) return atom.capitalize() # Merge KaTeX symbols.js stuff first master_base = dict(lshort_src) ktsyms_chck = set(ktsyms_names) for grpname, grpdict in ktsyms_src.items(): for kname, vdict in grpdict.items(): # Deal with dupllicates first. if kname in ktsyms_chck: ktsyms_chck.remove(kname) else: # Merge modes when duplicates found. if master_base[kname]['mode'] != vdict['mode']: nmode = enlist(vdict['mode'], master_base[kname]['mode']) master_base[kname].update(mode=nmode) # XXX - Justify why this exists, else delete... if ('katable' in master_base[kname]['meta'] and master_base[kname]['meta']['katable'] != grpname and 'Misc_' not in grpname): print('Cap. conflict w. %s: %s(new) -> %s(existing)' % (kname, grpname, master_base[kname]['meta']), file=sys.stderr) raise SystemExit continue # Change mode type to tuple in ktsyms. vdict.update(mode=enlist(vdict['mode'], vdict['mode'])) # Add missing 'type' key as 'command': vdict.update([('type', 'command')]) # Remove KaTeX font data