def modify_fonts(font_names, presentation='emoji', output=None, suffix=None, dst_dir=None, vs_added=None): assert dst_dir if output: assert len(font_names) == 1 for font_name in font_names: font = ttLib.TTFont(font_name) if font_data.get_variation_sequence_cmap(font): # process no font if any already has a var selector cmap raise ValueError('font %s already has a format 14 cmap' % font_name) if not path.exists(dst_dir): os.makedirs(dst_dir) emoji_variants = unicode_data.get_unicode_emoji_variants() if vs_added: emoji_variants = emoji_variants | vs_added for font_name in font_names: font = ttLib.TTFont(font_name) modify_font(font_name, font, presentation, emoji_variants) if output: new_name = output else: new_name = path.basename(font_name) if suffix: name, ext = path.splitext(new_name) new_name = name + suffix + ext font.save(path.join(dst_dir, new_name))
def _create_flag_sets(data_dir): """Returns map from flag name to pairs of cp_set, boolean. These get added to a codepoint name if the the boolean matches the result of 'cp in cp_set'.""" # These are hardcoded for now, should be able to specify on # command line... (TODO) # I propose supporting some emoji in Noto even if they don't have text # variation sequences proposed, we can remove those for Android if they # disagree. emoji_only = (unicode_data.get_emoji() - unicode_data.get_unicode_emoji_variants('proposed_extra')) current_sym2_path = path.join(data_dir, 'NotoSansSymbols2-Regular.ttf') current_sym2 = CodeList.fromfontcmap(current_sym2_path).codeset() sym2_path = path.join(data_dir, 'notosanssymbols2_cmap.txt') with open(sym2_path, 'r') as f: sym2_cmap = f.read() expect_sym2 = tool_utils.parse_int_ranges(sym2_cmap) add_sym2 = expect_sym2 - current_sym2 # True means set flag if cp in set, False means set if not in set flag_sets = { 'ref only': (expect_sym2, False), 'emoji only': (emoji_only, True), 'add': (add_sym2, True), } return flag_sets
def _create_codeset_from_expr(expr_list, flag_sets, data_dir, codelist_map): """Processes expr_list in order, building a codeset. See _read_flag_data_from_file for information on expr_list. This can modify flag_sets and codelist_map.""" result = () for op, exp in expr_list: if exp not in flag_sets: # its a codelist codes = _load_codelist(exp, data_dir, codelist_map).codeset() else: codes_or_spec = flag_sets[exp] if isinstance(codes_or_spec, (set, frozenset)): codes = codes_or_spec else: # replace the spec with the actual codes if codes_or_spec == None: # we only know about '_emoji_' and '_math_' if exp == '_emoji_': codes = ( unicode_data.get_emoji() - unicode_data.get_unicode_emoji_variants('proposed_extra')) elif exp == '_math_': codes = unicode_data.chars_with_property('Math') else: raise Exception('unknown special codeset "%s"' % exp) else: codes = _load_codelist( codes_or_spec, data_dir, codelist_map).codeset() flag_sets[exp] = codes if op == '|': if not result: # it appers that python 'optimizes' |= by replacing the lhs by rhs if # lhs is an empty set, but this changes the type of lhs to frozenset... result = set(codes) else: result |= codes elif op == '&': result &= codes elif op == '-': result -= codes else: raise Exception('unknown op "%s"' % op) return result
def subset_symbols(srcdir, dstdir): """Subset Noto Sans Symbols in a curated way. Noto Sans Symbols is now subsetted in a curated way. Changes include: * Currency symbols now included in Roboto are removed. * All combining marks for symbols (except for combining keycap) are added, to combine with other symbols if needed. * Characters in symbol blocks that are also covered by Noto CJK fonts are added, for better harmony with the rest of the fonts in non-CJK settings. The dentistry characters at U+23BE..23CC are not added, since they appear to be Japan-only and full-width. * Characters that UTR #51 defines as default text are added, although they may also exist in the color emoji font, to make sure they get a default text style. * Characters that UTR #51 defines as default emoji are removed, to make sure they don't block the fallback to the color emoji font. * A few math symbols that are currently included in Roboto are added, to prepare for potentially removing them from Roboto when they are lower-quality in Roboto. Based on subset_noto_sans_symbols.py from AOSP external/noto-fonts.""" # TODO see if we need to change this subset based on Noto Serif coverage # (so the serif fallback chain would support them) target_coverage = set() # Add all characters in BLOCKS_TO_INCLUDE for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE): target_coverage.update(range(first, last + 1)) # Add one-off characters target_coverage |= ONE_OFF_ADDITIONS # Remove characters preferably coming from Roboto target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO # Remove default emoji presentation (including ones Android prefers default) target_coverage -= EMOJI # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji # mechanism to work properly target_coverage.remove(0x20E3) # Remove dentistry symbols, as their main use appears to be for CJK: # http://www.unicode.org/L2/L2000/00098-n2195.pdf target_coverage -= set(range(0x23BE, 0x23CC + 1)) for font_file in glob.glob(path.join(srcdir, 'NotoSansSymbols-*.ttf')): print 'main subset', font_file out_file = path.join(dstdir, path.basename(font_file)[:-4] + '-Subsetted.ttf') subset.subset_font(font_file, out_file, include=target_coverage) # The second subset will be a fallback after the color emoji, for # explicit text presentation sequences. target_coverage = EMOJI | unicode_data.get_unicode_emoji_variants() for font_file in glob.glob(path.join(srcdir, 'NotoSansSymbols-*.ttf')): print 'secondary subset', font_file out_file = path.join(dstdir, path.basename(font_file)[:-4] + '-Subsetted2.ttf') subset.subset_font(font_file, out_file, include=target_coverage)
def subset_symbols(srcdir, dstdir): """Subset Noto Sans Symbols in a curated way. Noto Sans Symbols is now subsetted in a curated way. Changes include: * Currency symbols now included in Roboto are removed. * All combining marks for symbols (except for combining keycap) are added, to combine with other symbols if needed. * Characters in symbol blocks that are also covered by Noto CJK fonts are added, for better harmony with the rest of the fonts in non-CJK settings. The dentistry characters at U+23BE..23CC are not added, since they appear to be Japan-only and full-width. * Characters that UTR #51 defines as default text are added, although they may also exist in the color emoji font, to make sure they get a default text style. * Characters that UTR #51 defines as default emoji are removed, to make sure they don't block the fallback to the color emoji font. * A few math symbols that are currently included in Roboto are added, to prepare for potentially removing them from Roboto when they are lower-quality in Roboto. Based on subset_noto_sans_symbols.py from AOSP external/noto-fonts.""" # TODO see if we need to change this subset based on Noto Serif coverage # (so the serif fallback chain would support them) target_coverage = set() # Add all characters in BLOCKS_TO_INCLUDE for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE): target_coverage.update(range(first, last+1)) # Add one-off characters target_coverage |= ONE_OFF_ADDITIONS # Remove characters preferably coming from Roboto target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO # Remove default emoji presentation (including ones Android prefers default) target_coverage -= EMOJI # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji # mechanism to work properly target_coverage.remove(0x20E3) # Remove dentistry symbols, as their main use appears to be for CJK: # http://www.unicode.org/L2/L2000/00098-n2195.pdf target_coverage -= set(range(0x23BE, 0x23CC+1)) for font_file in glob.glob(path.join(srcdir, 'NotoSansSymbols-*.ttf')): print 'main subset', font_file out_file = path.join( dstdir, path.basename(font_file)[:-4] + '-Subsetted.ttf') subset.subset_font(font_file, out_file, include=target_coverage) # The second subset will be a fallback after the color emoji, for # explicit text presentation sequences. target_coverage = EMOJI | unicode_data.get_unicode_emoji_variants() for font_file in glob.glob(path.join(srcdir, 'NotoSansSymbols-*.ttf')): print 'secondary subset', font_file out_file = path.join( dstdir, path.basename(font_file)[:-4] + '-Subsetted2.ttf') subset.subset_font(font_file, out_file, include=target_coverage)