def _collect_script_to_punct(files):
  """Builds script to punct from provided cldr files.  Builds 'LGC'
  data from component scripts.  Adds ASCII single and double quotes if
  corresponding quotes are in the punct."""

  script_to_punct = collections.defaultdict(set)
  curly_quotes_to_standard = [
      (frozenset([unichr(0x2018), unichr(0x2019)]), frozenset(['\''])),
      (frozenset([unichr(0x201C), unichr(0x201D)]), frozenset(['"'])),
  ]
  for f in files:
    tree = ET.parse(f)
    punct = _collect_punct_data(tree)
    if punct:
      filename = path.splitext(path.basename(f))[0]
      script = cldr_data.get_likely_script(filename)
      if script == 'Zzzz':
        if filename != 'root':
          sys.stderr.write('no script for %s\n' % filename)
      else:
        script_to_punct[script] |= punct

  script_to_punct['LGC'] = set(
      script_to_punct['Latn'] |
      script_to_punct['Grek'] |
      script_to_punct['Cyrl'])

  for script in script_to_punct:
    punct = script_to_punct[script]
    for curly, standard in curly_quotes_to_standard:
      if curly & punct:
        punct.update(standard)

  return script_to_punct
Exemplo n.º 2
0
def _read_character_at(source, pointer):
    """Reads a code point or a backslash-u-escaped code point."""
    while pointer < len(source) and source[pointer] == ' ':
        pointer += 1

    if pointer >= len(source):
        raise IndexError('pointer %d out of range 0-%d' %
                         (pointer, len(source)))

    if source[pointer] == '\\':
        if source[pointer + 1].upper() == 'U':
            end_of_hex = pointer + 2
            while (end_of_hex < len(source)
                   and source[end_of_hex].upper() in '0123456789ABCDEF'):
                end_of_hex += 1
            if end_of_hex - (pointer + 2) not in {4, 5, 6, 8}:
                raise Exception(
                    'cldr_data: parse of unicode escape failed at %d: %s' %
                    (pointer, source[pointer:pointer + 10]))
            hex_code = source[pointer + 2:end_of_hex]
            return end_of_hex, unichr(int(hex_code, 16))
        else:
            return pointer + 2, source[pointer + 1]
    else:
        return pointer + 1, source[pointer]
Exemplo n.º 3
0
def exemplar_string_to_list(exstr):
    assert exstr[0] == "["
    exstr = exstr[1:]
    if exstr[-1] == "]":
        exstr = exstr[:-1]

    return_list = []
    pointer = 0
    while pointer < len(exstr):
        if exstr[pointer] in " ":
            pointer += 1
        elif exstr[pointer] == "{":
            multi_char = ""
            mc_ptr = pointer + 1
            while exstr[mc_ptr] != "}":
                mc_ptr, char = read_character_at(exstr, mc_ptr)
                multi_char += char
            return_list.append(multi_char)
            pointer = mc_ptr + 1
        elif exstr[pointer] == "-":
            previous = return_list[-1]
            assert len(previous) == 1  # can't have ranges with strings
            previous = ord(previous)

            pointer, last = read_character_at(exstr, pointer + 1)
            assert last not in [" ", "\\", "{", "}", "-"]
            last = ord(last)
            return_list += [
                unichr(code) for code in range(previous + 1, last + 1)
            ]
        else:
            pointer, char = read_character_at(exstr, pointer)
            return_list.append(char)

    return return_list
Exemplo n.º 4
0
    def sub(esc_match):
        esc_type = esc_match.group(1)
        esc_val = esc_match.group(2)
        if esc_type == 'x':
            esc_len = 2
        elif esc_type == 'u':
            esc_len = 4
        elif esc_type == 'U':
            esc_len = 6
        else:
            raise ValueError('internal error')

        if len(esc_val) < esc_len:
            error = 'Unicode escape too short: "%s"' % (esc_match.group(0))
            raise ValueError(error)
        unival = int(esc_val[:esc_len], 16)
        if unival > 0x10ffff:
            error = 'Unicode escape value too large: "%X"' % unival
            raise ValueError(error)
        if unival < 0x10000:
            prefix = unichr(unival)
        else:
            prefix = unicode('\\U%08X' % unival,
                             encoding='unicode_escape',
                             errors='strict')
        return prefix + esc_val[esc_len:]
Exemplo n.º 5
0
def exemplar_string_to_list(exstr):
    assert exstr[0] == '['
    exstr = exstr[1:]
    if exstr[-1] == ']':
        exstr = exstr[:-1]

    return_list = []
    pointer = 0
    while pointer < len(exstr):
        if exstr[pointer] in ' ':
            pointer += 1
        elif exstr[pointer] == '{':
            multi_char = ''
            mc_ptr = pointer + 1
            while exstr[mc_ptr] != '}':
                mc_ptr, char = read_character_at(exstr, mc_ptr)
                multi_char += char
            return_list.append(multi_char)
            pointer = mc_ptr + 1
        elif exstr[pointer] == '-':
            previous = return_list[-1]
            assert len(previous) == 1  # can't have ranges with strings
            previous = ord(previous)

            pointer, last = read_character_at(exstr, pointer + 1)
            assert last not in [' ', '\\', '{', '}', '-']
            last = ord(last)
            return_list += [
                unichr(code) for code in range(previous + 1, last + 1)
            ]
        else:
            pointer, char = read_character_at(exstr, pointer)
            return_list.append(char)

    return return_list
Exemplo n.º 6
0
def _format_set(char_set, name, filename):
    lines = ['%s = {' % name]
    for cp in sorted(char_set):
        name = unicode_data.name(cp)
        lines.append('    0x%04X,  # %s %s' % (cp, unichr(cp), name))
    lines.append('}\n')
    with codecs.open(filename, 'w', 'UTF-8') as f:
        f.write('\n'.join(lines))
    print('wrote', filename)
Exemplo n.º 7
0
def _format_set(char_set, name, filename):
    lines = ["%s = {" % name]
    for cp in sorted(char_set):
        name = unicode_data.name(cp)
        lines.append("    0x%04X,  # %s %s" % (cp, unichr(cp), name))
    lines.append("}\n")
    with codecs.open(filename, "w", "UTF-8") as f:
        f.write("\n".join(lines))
    print("wrote", filename)
Exemplo n.º 8
0
 def test_individual_spacing_marks(self):
     """Tests that spacing marks are spacing by themselves."""
     for font in self.font_files:
         print('Testing %s for stand-alone spacing marks...' % font)
         for mark in self.marks_to_test:
             mark = unichr(mark)
             advances = layout.get_advances(mark, font)
             assert len(advances) == 1
             self.assertNotEqual(advances[0], 0)
Exemplo n.º 9
0
def _character_string_html(codelist, used_font):
    C0_controls = frozenset(range(0, 0x20))
    rkey, rinfo = used_font
    _, _, f_codelist = rinfo[0]
    f_codeset = frozenset(f_codelist.codeset() - C0_controls)
    cps = [cp for cp in codelist.codes() if cp in f_codeset]
    if not cps:
        return None
    line = ['<bdo class="', rkey, ' line" dir="ltr">']
    line.extend(unichr(cp) for cp in cps)
    line.append("</bdo>")
    return "".join(line)
Exemplo n.º 10
0
def _segments_to_strings(segments, prefix, result):
    """Recursive utility function to expand segments into a list of strings."""
    if len(segments) == 0:
        result.append(prefix)
        return
    segment = segments[0]
    segments = segments[1:]
    if type(segment) == tuple:
        for cp in range(segment[0], segment[1] + 1):
            _segments_to_strings(segments, prefix + unichr(cp), result)
    else:
        _segments_to_strings(segments, prefix + segment, result)
Exemplo n.º 11
0
    def input_from_name(self, name, seen=None, pad=False):
        """Given glyph name, return input to harbuzz to render this glyph.

        Returns input in the form of a (features, text) tuple, where `features`
        is a list of feature tags to activate and `text` is an input string.

        Argument `seen` is used by the method to avoid following cycles when
        recursively looking for possible input. `pad` can be used to add
        whitespace to text output, for non-spacing glyphs.

        Can return None in two situations: if no possible input is found (no
        simple unicode mapping or substitution rule exists to generate the
        glyph), or if the requested glyph already exists in `seen` (in which
        case this path of generating input should not be followed further).
        """

        if name in self.memo:
            return self.memo[name]

        inputs = []

        # avoid following cyclic paths through features
        if seen is None:
            seen = set()
        if name in seen:
            return None
        seen.add(name)

        # see if this glyph has a simple unicode mapping
        if name in self.reverse_cmap:
            text = unichr(self.reverse_cmap[name])
            inputs.append(((), text))

        # check the substitution features
        inputs.extend(self._inputs_from_gsub(name, seen))
        seen.remove(name)

        # since this method sometimes returns None to avoid cycles, the
        # recursive calls that it makes might have themselves returned None,
        # but we should avoid returning None here if there are other options
        inputs = [i for i in inputs if i is not None]
        if not inputs:
            return None

        features, text = min(inputs)
        # can't pad if we don't support space
        if pad and self.space_width > 0:
            width, space = self.widths[name], self.space_width
            padding = ' ' * (width // space + (1 if width % space else 0))
            text = padding + text
        self.memo[name] = features, text
        return self.memo[name]
def _regular_expression_from_set(character_set):
    """Returns a regexp matching any sequence of a set of input characters.
    """
    character_set -= set(range(0x00, 0x20))  # Remove ASCII controls

    literal_list = []
    for code in character_set:
        char = unichr(code)
        if char in ["\\", "[", "]", "^", "-"]:
            char = "\\" + char
        literal_list.append(char)
    regexp = "[" + "".join(literal_list) + "]+"
    return re.compile(regexp)
Exemplo n.º 13
0
def _regular_expression_from_set(character_set):
    """Returns a regexp matching any sequence of a set of input characters.
    """
    character_set -= set(range(0x00, 0x20))  # Remove ASCII controls

    literal_list = []
    for code in character_set:
        char = unichr(code)
        if char in ['\\', '[', ']', '^', '-']:
            char = '\\' + char
        literal_list.append(char)
    regexp = '[' + ''.join(literal_list) + ']+'
    return re.compile(regexp)
Exemplo n.º 14
0
def read_character_at(source, pointer):
    assert source[pointer] not in " -{}"
    if source[pointer] == "\\":
        if source[pointer + 1] == "u":
            end_of_hex = pointer + 2
            while (end_of_hex < len(source)
                   and source[end_of_hex].upper() in "0123456789ABCDEF"):
                end_of_hex += 1
            assert end_of_hex - (pointer + 2) in {4, 5, 6}
            hex_code = source[pointer + 2:end_of_hex]
            return end_of_hex, unichr(int(hex_code, 16))
        else:
            return pointer + 2, source[pointer + 1]
    else:
        return pointer + 1, source[pointer]
def main(argv):
    """Outputs a space-separated list of characters based on input ranges."""
    chars = []
    for arg in argv[1:]:
        if '-' in arg:
            hyphen_index = arg.index('-')
            code1 = char_rep_to_code(arg[:hyphen_index])
            code2 = char_rep_to_code(arg[hyphen_index + 1:])
            chars += range(code1, code2 + 1)
        else:
            chars.append(char_rep_to_code(arg))
    chars = u' '.join([unichr(code) for code in chars])
    if sys.version_info >= (2, 7):
        print(chars)
    else:
        print(chars.encode('UTF-8'))
Exemplo n.º 16
0
    def test_combinations(self):
        """Tests that soft-dotted characters lose their dots when combined."""

        for font in self.font_files:
            print('Testing %s for soft-dotted combinations...' % font)

            # TODO: replace the following list with actual derivation based on
            # Unicode's soft-dotted property
            for base_letter in (u'ij\u012F\u0249\u0268\u029D\u02B2\u03F3\u0456'
                                u'\u0458\u1D62\u1D96\u1DA4\u1DA8\u1E2D\u1ECB'
                                u'\u2071\u2C7C'):
                print('Testing %s combinations' % base_letter.encode('UTF-8'))
                for mark in self.marks_to_test:
                    mark = unichr(mark)
                    letter_only = layout.get_glyphs(base_letter, font)
                    combination = layout.get_glyphs(base_letter + mark, font)
                    self.assertNotEqual(
                        combination[0], letter_only[0],
                        "The sequence <%04X, %04X> doesn't lose its dot, "
                        "but it should" % (ord(base_letter), ord(mark)))
def test_all_combinations(max_len,
                          font_file_name,
                          min_allowed,
                          max_allowed,
                          language=None):
    """Tests the rendering of all combinations up to certain length."""

    font_characters = coverage.character_set(font_file_name)
    font_characters -= set(range(0x00, 0x20))  # Remove ASCII controls
    font_characters = [unichr(code) for code in font_characters]
    font_characters = sorted(font_characters)

    all_strings = []
    for length in range(1, max_len + 1):
        all_combinations = itertools.product(font_characters, repeat=length)
        all_strings += ["".join(comb) for comb in all_combinations]

    test_data = "\n".join(all_strings)
    return test_rendering(test_data, font_file_name, min_allowed, max_allowed,
                          language)
Exemplo n.º 18
0
 def test_spacing_marks_in_combination(self):
     """Tests that spacing marks do not combine with base letters."""
     for font in self.font_files:
         print('Testing %s for spacing marks in combination...' % font)
         for base_letter in (u'A\u00C6BCDEFGHIJKLMNO\u00D8\u01A0PRST'
                             u'U\u01AFVWXYZ'
                             u'a\u00E6bcdefghi\u0131j\u0237klmn'
                             u'o\u00F8\u01A1prs\u017Ftu\u01B0vwxyz'
                             u'\u03D2'):
             print('Testing %s combinations' % base_letter)
             for mark in self.marks_to_test:
                 if mark == 0x02DE:
                     # Skip rhotic hook, as it's perhaps OK for it to form
                     # ligatures
                     continue
                 mark = unichr(mark)
                 advances = layout.get_advances(base_letter + mark, font)
                 self.assertEqual(
                     len(advances), 2,
                     'The sequence <%04X, %04X> combines, '
                     'but it should not' % (ord(base_letter), ord(mark)))
Exemplo n.º 19
0
def _write_char_text(chars, filepath, chars_per_line, sep):
    def accept_cp(cp):
        cat = unicode_data.category(cp)
        return cat[0] not in ["M", "C", "Z"] or cat == "Co"

    text = [unichr(cp) for cp in chars if accept_cp(cp)]
    filename, _ = path.splitext(path.basename(filepath))
    m = re.match(r"(.*)-(?:Regular|Bold|Italic|BoldItalic)", filename)
    if m:
        filename = m.group(1)
    filename += "_chars.txt"
    print("writing file: %s" % filename)
    print("%d characters (of %d)" % (len(text), len(chars)))
    if chars_per_line > 0:
        lines = []
        for n in range(0, len(text), chars_per_line):
            substr = text[n:n + chars_per_line]
            lines.append(sep.join(cp for cp in substr))
        text = "\n".join(lines)
    with codecs.open(filename, "w", "utf-8") as f:
        f.write(text)
Exemplo n.º 20
0
    def run_sub_coverage_test(self, feature, reqs_path):
        """Tests that a substitution feature is supported for a required set."""

        with open(reqs_path) as reqs_file:
            reqs_list = []
            for line in reqs_file.readlines():
                input_cp, output_name = line[:line.index(' #')].split()
                reqs_list.append((unichr(int(input_cp, 16)), output_name))

        for fontfile, font in zip(self.fontfiles, self.fonts):
            glyph_order = font.getGlyphOrder()
            chars_with_no_sub = []
            for char, expected_name in reqs_list:
                sub = layout.get_glyphs(char, fontfile,
                                        '--features=%s' % feature)
                if glyph_order[sub[0]] != expected_name:
                    chars_with_no_sub.append(char)
            self.assertEqual(
                chars_with_no_sub, [],
                ("%s feature is not applied correctly to '%s'" %
                 (feature, u''.join(chars_with_no_sub).encode('UTF-8'))))
Exemplo n.º 21
0
def _write_char_text(chars, filepath, chars_per_line, sep):
    def accept_cp(cp):
        cat = unicode_data.category(cp)
        return cat[0] not in ['M', 'C', 'Z'] or cat == 'Co'

    text = [unichr(cp) for cp in chars if accept_cp(cp)]
    filename, _ = path.splitext(path.basename(filepath))
    m = re.match(r'(.*)-(?:Regular|Bold|Italic|BoldItalic)', filename)
    if m:
        filename = m.group(1)
    filename += '_chars.txt'
    print('writing file: %s' % filename)
    print('%d characters (of %d)' % (len(text), len(chars)))
    if chars_per_line > 0:
        lines = []
        for n in range(0, len(text), chars_per_line):
            substr = text[n:n + chars_per_line]
            lines.append(sep.join(cp for cp in substr))
        text = '\n'.join(lines)
    with codecs.open(filename, 'w', 'utf-8') as f:
        f.write(text)
Exemplo n.º 22
0
    def sub(esc_match):
        esc_type = esc_match.group(1)
        esc_val = esc_match.group(2)
        if esc_type == "x":
            esc_len = 2
        elif esc_type == "u":
            esc_len = 4
        elif esc_type == "U":
            esc_len = 6
        else:
            raise ValueError("internal error")

        if len(esc_val) < esc_len:
            error = 'Unicode escape too short: "%s"' % (esc_match.group(0))
            raise ValueError(error)
        unival = int(esc_val[:esc_len], 16)
        if unival > 0x10FFFF:
            error = 'Unicode escape value too large: "%X"' % unival
            raise ValueError(error)
        prefix = unichr(unival)
        return prefix + esc_val[esc_len:]
Exemplo n.º 23
0
def _generate_excluded_characters():
    # Some of these exclusions are desired, and some are reluctantly applied because
    # Noto currently does not support some characters.  We use the generated
    # data as fallback samples on a per-script and not per-font basis, which is also
    # a problem.

    # Religious characters
    # deva OM, Arabic pbuh, bismillah
    codepoints = [0x950, 0xFDFA, 0xFDFD]

    # Cyrillic characters not in sans or serif
    codepoints.append(0x2E2F)
    for cp in range(0xA640, 0xA680):
        codepoints.append(cp)

    # Arabic character not in kufi
    codepoints.append(0x08A0)

    chars = set()
    for cp in codepoints:
        chars.add(unichr(cp))
    return frozenset(chars)
Exemplo n.º 24
0
def unicode_set_string_to_list(us_str):
    if us_str[0] == "[":
        assert us_str[-1] == "]"
        us_str = us_str[1:-1]

    result = []
    pointer = 0
    while pointer < len(us_str):
        if us_str[pointer] in " ":
            pointer += 1
        elif us_str[pointer] == "{":
            multi_char = ""
            mc_ptr = pointer + 1
            while us_str[mc_ptr] != "}":
                mc_ptr, char = _read_character_at(us_str, mc_ptr)
                multi_char += char
            result.append(multi_char)
            pointer = mc_ptr + 1
        elif us_str[pointer] == "-":
            while pointer + 1 < len(us_str) and us_str[pointer + 1] == " ":
                pointer += 1
                continue
            if pointer + 1 == len(us_str):  # hyphen before ']' is special
                result.append("-")
                break
            previous = result[-1]
            assert len(previous) == 1  # can't have ranges with strings
            previous = ord(previous)

            pointer, last = _read_character_at(us_str, pointer + 1)
            assert last not in [" ", "\\", "{", "}", "-"]
            last = ord(last)
            result += [unichr(code) for code in range(previous + 1, last + 1)]
        else:
            pointer, char = _read_character_at(us_str, pointer)
            result.append(char)

    return result
Exemplo n.º 25
0
def unicode_set_string_to_list(us_str):
    if us_str[0] == '[':
        assert us_str[-1] == ']'
        us_str = us_str[1:-1]

    result = []
    pointer = 0
    while pointer < len(us_str):
        if us_str[pointer] in ' ':
            pointer += 1
        elif us_str[pointer] == '{':
            multi_char = ''
            mc_ptr = pointer + 1
            while us_str[mc_ptr] != '}':
                mc_ptr, char = _read_character_at(us_str, mc_ptr)
                multi_char += char
            result.append(multi_char)
            pointer = mc_ptr + 1
        elif us_str[pointer] == '-':
            while pointer + 1 < len(us_str) and us_str[pointer + 1] == ' ':
                pointer += 1
                continue
            if pointer + 1 == len(us_str):  # hyphen before ']' is special
                result.append('-')
                break
            previous = result[-1]
            assert len(previous) == 1  # can't have ranges with strings
            previous = ord(previous)

            pointer, last = _read_character_at(us_str, pointer + 1)
            assert last not in [' ', '\\', '{', '}', '-']
            last = ord(last)
            result += [unichr(code) for code in range(previous + 1, last + 1)]
        else:
            pointer, char = _read_character_at(us_str, pointer)
            result.append(char)

    return result
Exemplo n.º 26
0
def render_codes(
    file_name,
    code_list,
    font_name,
    weight_name,
    style_name,
    stretch_name,
    font_size,
    lang,
    ext,
):
    text = u"".join([unichr(int(s, 16)) for s in code_list])
    render_text(
        file_name,
        text,
        font_name,
        weight_name,
        style_name,
        stretch_name,
        font_size,
        lang,
        ext,
    )
Exemplo n.º 27
0
                elif stage == STAGE_GLYPHS:
                    cr.set_source_rgb(*font.color.rgb)
                    # cr.set_source_rgb(0,0,0)
                    cr.set_font_face(font.get_cairo_font_face())
                    ascent, descent, font_height, max_x_adv, max_y_adv = cr.font_extents(
                    )

                    cr.save()
                    # XXX cr.set_font_size (FONT_SIZE*FONT_SIZE / (ascent+descent))
                    cr.set_font_size(
                        round(1.2 * FONT_SIZE * FONT_SIZE /
                              (ascent + descent)))

                    ascent, descent, font_height, max_x_adv, max_y_adv = cr.font_extents(
                    )
                    utf8 = unichr(char).encode('utf-8')
                    x1, y1, width, height, xadv, yadv = cr.text_extents(utf8)
                    cr.move_to(FONT_SIZE * .5 - (x1 + .5 * width),
                               FONT_SIZE * .5 - (-ascent + descent) * .5)
                    cr.show_text(utf8)

                    cr.restore()
                break
            cr.translate(FONT_SIZE, 0)
            cr.translate(PADDING, 0)
        cr.set_source_rgb(0, 0, 0)
        cr.move_to(MARGIN, FONT_SIZE)
        if stage == 0:
            cr.set_font_face(noto_sans_lgc)
            cr.show_text("U+%04X" % (row_start + NUM_COLS - 1))
        cr.translate(LABEL_WIDTH + 2 * MARGIN, 0)
Exemplo n.º 28
0
def _build_text(name_map, initial_text=''):
    text = initial_text
    print('build text using map of length %d' % len(name_map))
    while True:
        line = input('> ')
        if not line:
            continue
        if line == 'quit':
            break
        if line == 'help':
            _help()
            continue
        if line == 'names':
            print('names:\n  ' + '\n  '.join(sorted(name_map.keys())))
            continue
        if line == 'dump':
            print('dump: \'%s\'' % text)
            for cp in text:
                print('%06x %s' % (ord(cp), unicode_data.name(ord(cp))))
            continue
        if line == 'clear':
            text = ''
            continue
        if line == 'write':
            line = input('file name> ')
            if line:
                _write_text(line, text)
            continue

        matches = []
        for name, cp in sorted(name_map.items()):
            if line in name:
                matches.append(name)
        if not matches:
            print('no match for "%s"' % line)
            continue

        if len(matches) == 1:
            print(matches[0])
            text += unichr(name_map[matches[0]])
            continue

        # if we match a full line, then use that
        if line in matches:
            print(line)
            text += unichr(name_map[line])
            continue

        new_matches = []
        for m in matches:
            if line in m.split(' '):
                new_matches.append(m)

        # if we match a full word, and only one line has this full word, use that
        if len(new_matches) == 1:
            print(new_matches[0])
            text += unichr(name_map[new_matches[0]])
            continue

        select_multiple = True
        while select_multiple:
            print('multiple matches:\n  ' +
                  '\n  '.join('[%2d] %s' % (i, n)
                              for i, n in enumerate(matches)))
            while True:
                line = input('0-%d or q to skip> ' % (len(matches) - 1))
                if line == 'q':
                    select_multiple = False
                    break
                try:
                    n = int(line)
                    break
                except ValueError:
                    continue

            if not select_multiple:  # q
                break

            if n < 0 or n >= len(matches):
                print('%d out of range' % n)
                continue

            text += unichr(name_map[matches[n]])
            select_multiple = False

    print('done.')
    return text
Exemplo n.º 29
0
    def find_rendered_diffs(self, font_size=128, render_path=None):
        """Find diffs of glyphs as rendered by harfbuzz."""

        hb_input_generator_a = hb_input.HbInputGenerator(self.font_a)
        hb_input_generator_b = hb_input.HbInputGenerator(self.font_b)

        if render_path:
            font_name, _ = os.path.splitext(self.basepath)
            render_path = os.path.join(render_path, font_name)
            if not os.path.exists(render_path):
                os.makedirs(render_path)

        self.build_names()
        diffs = []
        for name in self.names:
            class_a = self.gdef_a.get(name, GDEF_UNDEF)
            class_b = self.gdef_b.get(name, GDEF_UNDEF)
            if GDEF_MARK in (class_a, class_b) and class_a != class_b:
                self.stats["gdef_mark_mismatch"].append(
                    (self.basepath, name, GDEF_LABELS[class_a], GDEF_LABELS[class_b])
                )
                continue

            width_a = self.glyph_set_a[name].width
            width_b = self.glyph_set_b[name].width
            zwidth_a = width_a == 0
            zwidth_b = width_b == 0
            if zwidth_a != zwidth_b:
                self.stats["zero_width_mismatch"].append(
                    (self.basepath, name, width_a, width_b)
                )
                continue

            hb_args_a = hb_input_generator_a.input_from_name(name, pad=zwidth_a)
            hb_args_b = hb_input_generator_b.input_from_name(name, pad=zwidth_b)
            if hb_args_a != hb_args_b:
                self.stats["input_mismatch"].append(
                    (self.basepath, name, hb_args_a, hb_args_b)
                )
                continue

            # ignore unreachable characters
            if not hb_args_a:
                self.stats["untested"].append((self.basepath, name))
                continue

            features, text = hb_args_a

            # ignore null character
            if unichr(0) in text:
                continue

            img_file_a = BytesIO(
                subprocess.check_output(
                    [
                        "hb-view",
                        "--font-size=%d" % font_size,
                        "--features=%s" % ",".join(features),
                        self.path_a,
                        text,
                    ]
                )
            )
            img_file_b = BytesIO(
                subprocess.check_output(
                    [
                        "hb-view",
                        "--font-size=%d" % font_size,
                        "--features=%s" % ",".join(features),
                        self.path_b,
                        text,
                    ]
                )
            )
            img_a = Image.open(img_file_a)
            img_b = Image.open(img_file_b)
            width_a, height_a = img_a.size
            width_b, height_b = img_b.size
            data_a = img_a.getdata()
            data_b = img_b.getdata()
            img_file_a.close()
            img_file_b.close()

            width, height = max(width_a, width_b), max(height_a, height_b)
            offset_ax = (width - width_a) // 2
            offset_ay = (height - height_a) // 2
            offset_bx = (width - width_b) // 2
            offset_by = (height - height_b) // 2

            diff = 0
            for y in range(height):
                for x in range(width):
                    ax, ay = x - offset_ax, y - offset_ay
                    bx, by = x - offset_bx, y - offset_by
                    if (
                        ax < 0
                        or bx < 0
                        or ax >= width_a
                        or bx >= width_b
                        or ay < 0
                        or by < 0
                        or ay >= height_a
                        or by >= height_b
                    ):
                        diff += 1
                    else:
                        diff += (
                            abs(data_a[ax + ay * width_a] - data_b[bx + by * width_b])
                            / 255
                        )

            if self.ratio_diffs:
                diff /= width * height

            if render_path and diff > self.diff_threshold:
                img_cmp = Image.new("RGB", (width, height))
                data_cmp = list(img_cmp.getdata())
                self._project(data_a, width_a, height_a, data_cmp, width, height, 1)
                self._project(data_b, width_b, height_b, data_cmp, width, height, 0)
                for y in range(height):
                    for x in range(width):
                        i = x + y * width
                        r, g, b = data_cmp[i]
                        assert b == 0
                        data_cmp[i] = r, g, min(r, g)
                img_cmp.putdata(data_cmp)
                img_cmp.save(self._rendered_png(render_path, name))

            diffs.append((name, diff))

        mismatched = {}
        for name, diff in diffs:
            if diff > self.diff_threshold:
                mismatched[name] = diff

        stats = self.stats["compared"]
        for name, diff in mismatched.items():
            stats.append((diff, name, self.basepath))
Exemplo n.º 30
0
                    ) = cr.font_extents()

                    cr.save()
                    # XXX cr.set_font_size (FONT_SIZE*FONT_SIZE / (ascent+descent))
                    cr.set_font_size(
                        round(1.2 * FONT_SIZE * FONT_SIZE / (ascent + descent))
                    )

                    (
                        ascent,
                        descent,
                        font_height,
                        max_x_adv,
                        max_y_adv,
                    ) = cr.font_extents()
                    utf8 = unichr(char).encode("utf-8")
                    x1, y1, width, height, xadv, yadv = cr.text_extents(utf8)
                    cr.move_to(
                        FONT_SIZE * 0.5 - (x1 + 0.5 * width),
                        FONT_SIZE * 0.5 - (-ascent + descent) * 0.5,
                    )
                    cr.show_text(utf8)

                    cr.restore()
                break
            cr.translate(FONT_SIZE, 0)
            cr.translate(PADDING, 0)
        cr.set_source_rgb(0, 0, 0)
        cr.move_to(MARGIN, FONT_SIZE)
        if stage == 0:
            cr.set_font_face(noto_sans_lgc)