def test_parse_code_ranges(self): """Tests the _parse_code_ranges method.""" source = ( '0000..001F ; Common # Cc [32] <control-0000>..<control-001F>\n' '0020 ; Common # Zs SPACE\n') self.assertEqual([(0, 31, 'Common'), (32, 32, 'Common')], unicode_data._parse_code_ranges(source))
def main(argv): """Subset the Noto Symbols font. The first argument is the source file name, and the second argument is the target file name. """ target_coverage = set() # Add all characters in BLOCKS_TO_INCLUDE for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE): target_coverage.update(range(first, last + 1)) # Add one-off characters target_coverage |= ONE_OFF_ADDITIONS # Remove characters preferably coming from Roboto target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO # Remove characters that are supposed to default to emoji target_coverage -= BMP_DEFAULT_EMOJI | ANDROID_EMOJI # Remove dentistry symbols, as their main use appears to be for CJK: # http://www.unicode.org/L2/L2000/00098-n2195.pdf target_coverage -= set(range(0x23BE, 0x23CC + 1)) # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji # mechanism to work properly target_coverage.remove(0x20E3) source_file_name = argv[1] target_file_name = argv[2] subset.subset_font(source_file_name, target_file_name, include=target_coverage)
def test_parse_code_ranges(self): """Tests the _parse_code_ranges method.""" source = ( '0000..001F ; Common # Cc [32] <control-0000>..<control-001F>\n' '0020 ; Common # Zs SPACE\n') self.assertEqual( [(0, 31, 'Common'), (32, 32, 'Common')], unicode_data._parse_code_ranges(source))
def _symbol_set(): """Returns set of characters that should be supported in Noto Symbols. """ global _SYMBOL_SET if not _SYMBOL_SET: ranges = unicode_data._parse_code_ranges(noto_data.SYMBOL_RANGES_TXT) _SYMBOL_SET = code_range_to_set(ranges) & unicode_data.defined_characters() return _SYMBOL_SET
def main(argv): """Subset the Noto Symbols font. The first argument is the source file name, and the second argument is the target file name. """ target_coverage = set() # Add all characters in BLOCKS_TO_INCLUDE for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE): target_coverage.update(range(first, last+1)) # Add one-off characters target_coverage |= ONE_OFF_ADDITIONS # Remove characters preferably coming from Roboto target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO # Remove characters that are supposed to default to emoji android_emoji = get_android_emoji() target_coverage -= DEFAULT_EMOJI | android_emoji # Remove dentistry symbols, as their main use appears to be for CJK: # http://www.unicode.org/L2/L2000/00098-n2195.pdf target_coverage -= set(range(0x23BE, 0x23CC+1)) # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji # mechanism to work properly. target_coverage.remove(0x20E3) source_file_name = argv[1] target_file_name = argv[2] subset.subset_font( source_file_name, target_file_name, include=target_coverage) second_subset_coverage = DEFAULT_EMOJI | android_emoji second_subset_file_name = argv[3] subset.subset_font( source_file_name, second_subset_file_name, include=second_subset_coverage)
def subset_symbols(srcdir, dstdir): """Subset Noto Sans Symbols in a curated way. Noto Sans Symbols is now subsetted in a curated way. Changes include: * Currency symbols now included in Roboto are removed. * All combining marks for symbols (except for combining keycap) are added, to combine with other symbols if needed. * Characters in symbol blocks that are also covered by Noto CJK fonts are added, for better harmony with the rest of the fonts in non-CJK settings. The dentistry characters at U+23BE..23CC are not added, since they appear to be Japan-only and full-width. * Characters that UTR #51 defines as default text are added, although they may also exist in the color emoji font, to make sure they get a default text style. * Characters that UTR #51 defines as default emoji are removed, to make sure they don't block the fallback to the color emoji font. * A few math symbols that are currently included in Roboto are added, to prepare for potentially removing them from Roboto when they are lower-quality in Roboto. Based on subset_noto_sans_symbols.py from AOSP external/noto-fonts.""" # TODO see if we need to change this subset based on Noto Serif coverage # (so the serif fallback chain would support them) target_coverage = set() # Add all characters in BLOCKS_TO_INCLUDE for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE): target_coverage.update(range(first, last + 1)) # Add one-off characters target_coverage |= ONE_OFF_ADDITIONS # Remove characters preferably coming from Roboto target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO # Remove default emoji presentation (including ones Android prefers default) target_coverage -= EMOJI # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji # mechanism to work properly target_coverage.remove(0x20E3) # Remove dentistry symbols, as their main use appears to be for CJK: # http://www.unicode.org/L2/L2000/00098-n2195.pdf target_coverage -= set(range(0x23BE, 0x23CC + 1)) for font_file in glob.glob(path.join(srcdir, 'NotoSansSymbols-*.ttf')): print 'main subset', font_file out_file = path.join(dstdir, path.basename(font_file)[:-4] + '-Subsetted.ttf') subset.subset_font(font_file, out_file, include=target_coverage) # The second subset will be a fallback after the color emoji, for # explicit text presentation sequences. target_coverage = EMOJI | unicode_data.get_unicode_emoji_variants() for font_file in glob.glob(path.join(srcdir, 'NotoSansSymbols-*.ttf')): print 'secondary subset', font_file out_file = path.join(dstdir, path.basename(font_file)[:-4] + '-Subsetted2.ttf') subset.subset_font(font_file, out_file, include=target_coverage)
def subset_symbols(srcdir, dstdir): """Subset Noto Sans Symbols in a curated way. Noto Sans Symbols is now subsetted in a curated way. Changes include: * Currency symbols now included in Roboto are removed. * All combining marks for symbols (except for combining keycap) are added, to combine with other symbols if needed. * Characters in symbol blocks that are also covered by Noto CJK fonts are added, for better harmony with the rest of the fonts in non-CJK settings. The dentistry characters at U+23BE..23CC are not added, since they appear to be Japan-only and full-width. * Characters that UTR #51 defines as default text are added, although they may also exist in the color emoji font, to make sure they get a default text style. * Characters that UTR #51 defines as default emoji are removed, to make sure they don't block the fallback to the color emoji font. * A few math symbols that are currently included in Roboto are added, to prepare for potentially removing them from Roboto when they are lower-quality in Roboto. Based on subset_noto_sans_symbols.py from AOSP external/noto-fonts.""" # TODO see if we need to change this subset based on Noto Serif coverage # (so the serif fallback chain would support them) target_coverage = set() # Add all characters in BLOCKS_TO_INCLUDE for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE): target_coverage.update(range(first, last+1)) # Add one-off characters target_coverage |= ONE_OFF_ADDITIONS # Remove characters preferably coming from Roboto target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO # Remove default emoji presentation (including ones Android prefers default) target_coverage -= EMOJI # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji # mechanism to work properly target_coverage.remove(0x20E3) # Remove dentistry symbols, as their main use appears to be for CJK: # http://www.unicode.org/L2/L2000/00098-n2195.pdf target_coverage -= set(range(0x23BE, 0x23CC+1)) for font_file in glob.glob(path.join(srcdir, 'NotoSansSymbols-*.ttf')): print 'main subset', font_file out_file = path.join( dstdir, path.basename(font_file)[:-4] + '-Subsetted.ttf') subset.subset_font(font_file, out_file, include=target_coverage) # The second subset will be a fallback after the color emoji, for # explicit text presentation sequences. target_coverage = EMOJI | unicode_data.get_unicode_emoji_variants() for font_file in glob.glob(path.join(srcdir, 'NotoSansSymbols-*.ttf')): print 'secondary subset', font_file out_file = path.join( dstdir, path.basename(font_file)[:-4] + '-Subsetted2.ttf') subset.subset_font(font_file, out_file, include=target_coverage)
def _cjk_set(): """Returns set of characters that will be provided in CJK fonts.""" ranges = unicode_data._parse_code_ranges(noto_data.CJK_RANGES_TXT) return _code_range_to_set(ranges)
def _symbol_set(): """Returns set of characters that should be supported in Noto Symbols.""" ranges = unicode_data._parse_code_ranges(noto_data.SYMBOL_RANGES_TXT) return _code_range_to_set(ranges)
def _math_set(): """Returns set of characters that should be supported in Noto Math.""" ranges = unicode_data._parse_code_ranges(noto_data.MATH_RANGES_TXT) return _code_range_to_set(ranges)