Пример #1
0
def main(argv):
    languages = _LoadLanguages(os.path.join(FLAGS.lang, 'languages'))
    scripts = _LoadScripts(os.path.join(FLAGS.lang, 'scripts'))
    regions = _LoadRegions(os.path.join(FLAGS.lang, 'regions'))

    if FLAGS.report:
        assert len(argv) > 1, 'No METADATA.pb files specified'
        assert FLAGS.out is not None, 'No output dir specified (--out)'
        print('Writing insights report...')
        _WriteReport(argv[1:], FLAGS.out, languages)
    elif FLAGS.sample_text_audit:
        assert FLAGS.out is not None, 'No output dir specified (--out)'
        print('Auditing sample text')
        seen_scripts = set()
        unused_scripts = set()
        for path in argv[1:]:
            family = _ReadProto(fonts_public_pb2.FamilyProto(), path)
            for l in family.languages:
                seen_scripts.add(languages[l].script)
        for s in scripts:
            if s not in seen_scripts:
                unused_scripts.add(s)
        _SampleTextAudit(FLAGS.out, languages, scripts, unused_scripts)
    else:
        assert len(argv) > 1, 'No METADATA.pb files specified'
        line_to_lang_name = {}
        for l in languages:
            line = 'languages: "{code}"'.format(code=languages[l].id)
            line_to_lang_name[line] = languages[l].name
        for path in argv[1:]:
            _AddLanguageSupportMetadata(path, languages, scripts,
                                        line_to_lang_name)
Пример #2
0
def Metadata(file_or_dir):
  """Returns fonts_metadata.proto object for a metadata file.

  If file_or_dir is a file named METADATA.pb, load it. If file_or_dir is a
  directory, load the METADATA.pb file in that directory.

  Args:
    file_or_dir: A file or directory.
  Returns:
    Python object loaded from METADATA.pb content.
  Raises:
    ValueError: if file_or_dir isn't a METADATA.pb file or dir containing one.
  """
  if (os.path.isfile(file_or_dir) and
      os.path.basename(file_or_dir) == 'METADATA.pb'):
    metadata_file = file_or_dir
  elif os.path.isdir(file_or_dir):
    metadata_file = os.path.join(file_or_dir, 'METADATA.pb')
    if not os.path.isfile(metadata_file):
      raise ValueError('No METADATA.pb in %s' % file_or_dir)
  else:
    raise ValueError(
        '%s is neither METADATA.pb file or a directory' % file_or_dir)

  msg = fonts_pb2.FamilyProto()
  with codecs.open(metadata_file, encoding='utf-8') as f:
    text_format.Merge(f.read(), msg)

  return msg
Пример #3
0
def _AddGlyphMetadata(metadata_path):
    family = _ReadProto(fonts_public_pb2.FamilyProto(), metadata_path)
    font = _GetExemplarFont(family)
    fontfile = os.path.join(os.path.dirname(metadata_path), font.filename)
    family.sample_glyphs.clear()
    for section, glyphs in _GetSampleGlyphs(fontfile):
        family.sample_glyphs[section] = glyphs
    _WriteProto(family, metadata_path)
Пример #4
0
def _AddLanguageSupportMetadata(metadata_path, languages, scripts,
                                line_to_lang_name):
    family = _ReadProto(fonts_public_pb2.FamilyProto(), metadata_path)
    if len(family.languages) > 0:
        return
    font = _GetExemplarFont(family)
    fontfile = os.path.join(os.path.dirname(metadata_path), font.filename)
    supported_languages = _SupportedLanguages(fontfile, languages)
    supported_languages = [l.id for l in supported_languages]
    family.languages.extend(sorted(supported_languages))
    _WriteProto(family, metadata_path, comments=line_to_lang_name)
Пример #5
0
def main(argv):
    languages = fonts.LoadLanguages(os.path.join(FLAGS.lang, 'languages'))
    scripts = fonts.LoadScripts(os.path.join(FLAGS.lang, 'scripts'))

    if FLAGS.report:
        assert len(argv) > 1, 'No METADATA.pb files specified'
        assert FLAGS.out is not None, 'No output dir specified (--out)'
        print('Writing insights report...')
        _WriteReport(argv[1:], FLAGS.out, languages)
    elif FLAGS.sample_text_audit:
        assert FLAGS.out is not None, 'No output dir specified (--out)'
        print('Auditing sample text')
        seen_scripts = set()
        unused_scripts = set()
        for path in argv[1:]:
            family = fonts.ReadProto(fonts_public_pb2.FamilyProto(), path)
            for l in family.languages:
                seen_scripts.add(languages[l].script)
        for s in scripts:
            if s not in seen_scripts:
                unused_scripts.add(s)
        _SampleTextAudit(FLAGS.out, languages, scripts, unused_scripts)
    else:
        assert len(argv) > 1, 'No METADATA.pb files specified'
        language_comments = fonts.LanguageComments(languages)
        for path in argv[1:]:
            family_metadata = fonts.ReadProto(fonts_public_pb2.FamilyProto(),
                                              path)
            if len(family_metadata.languages) > 0:
                continue
            exemplar_font_fp = os.path.join(
                os.path.dirname(path),
                fonts.GetExemplarFont(family_metadata).filename)
            exemplar_font = TTFont(exemplar_font_fp)
            supported_languages = fonts.SupportedLanguages(
                exemplar_font, languages)
            supported_languages = sorted([l.id for l in supported_languages])
            family_metadata.languages.extend(supported_languages)
            fonts.WriteProto(family_metadata, path, comments=language_comments)
Пример #6
0
def families_from_file(fp):
    """Convert to_sandbox.txt and to_production.txt files to a list of
    family names."""
    results = set()
    with open(fp) as doc:
        family_dirs = doc.read().split()
    metadata_files = [Path(fp).parent / d / 'METADATA.pb' for d in family_dirs]
    missing_files = [str(f) for f in metadata_files if not f.is_file()]
    if missing_files:
        raise FileNotFoundError(
            "Following METADATA.pbs files are missing:\n{}".format(
                "\n".join(missing_files)
            )
        )
    return [read_proto(f, fonts_pb2.FamilyProto()).name for f in metadata_files]
Пример #7
0
def main(argv):
    assert len(argv) > 1, 'No METADATA.pb files specified'

    if FLAGS.preview:
        print('Running in preview mode. No changes will be made.')
        print('The names of families detected as part of the Noto')
        print('collection will be printed below.')

    for path in argv[1:]:
        family = _ReadProto(fonts_public_pb2.FamilyProto(), path)
        if NOTO_FAMILY_NAME.search(family.name):
            if FLAGS.preview:
                print(family.name)
            else:
                family.is_noto = True
                _WriteProto(family, path)
Пример #8
0
def _WriteReport(metadata_paths, out_dir, languages):
    rows = [[
        'id', 'name', 'lang', 'script', 'population', 'ec_base',
        'ec_auxiliary', 'ec_marks', 'ec_numerals', 'ec_punctuation',
        'ec_index', 'st_fallback', 'st_fallback_name', 'st_masthead_full',
        'st_masthead_partial', 'st_styles', 'st_tester', 'st_poster_sm',
        'st_poster_md', 'st_poster_lg', 'st_specimen_48', 'st_specimen_36',
        'st_specimen_32', 'st_specimen_21', 'st_specimen_16'
    ]]

    without_lang = []
    without_sample_text = []
    supported_without_sample_text = {}
    for metadata_path in metadata_paths:
        family = fonts.ReadProto(fonts_public_pb2.FamilyProto(), metadata_path)
        if len(family.languages) == 0:
            without_lang.append(family.name)
        else:
            supports_lang_with_sample_text = False
            for lang_code in family.languages:
                if languages[lang_code].HasField('sample_text'):
                    supports_lang_with_sample_text = True
                    break
            if not supports_lang_with_sample_text:
                without_sample_text.append(family.name)
        for l in family.languages:
            if not languages[l].HasField(
                    'sample_text') and l not in supported_without_sample_text:
                supported_without_sample_text[l] = languages[l]

    for lang in supported_without_sample_text.values():
        rows.append(
            [lang.id, lang.name, lang.language, lang.script, lang.population])

    path = os.path.join(out_dir, 'support.csv')
    _WriteCsv(path, rows)
Пример #9
0
def _MakeMetadata(fontdir, is_new):
    """Builds a dictionary matching a METADATA.pb file.

  Args:
    fontdir: Directory containing font files for which we want metadata.
    is_new: Whether this is an existing or new family.
  Returns:
    OrderedDict of a complete METADATA.pb structure.
  Raises:
    RuntimeError: If the variable font axes info differs between font files of
    same family.
  """
    file_family_style_weights = _FileFamilyStyleWeights(fontdir)

    first_file = file_family_style_weights[0].file
    subsets = ['menu'] + [
        s[0] for s in fonts.SubsetsInFont(first_file, FLAGS.min_pct,
                                          FLAGS.min_pct_ext)
    ]
    old_metadata_file = os.path.join(fontdir, 'METADATA.pb')
    font_license = fonts.LicenseFromPath(fontdir)

    metadata = fonts_pb2.FamilyProto()
    metadata.name = file_family_style_weights[0].family

    if not is_new:
        old_metadata = fonts_pb2.FamilyProto()
        with open(old_metadata_file, 'rb') as old_meta:
            text_format.Parse(old_meta.read(), old_metadata)
            metadata.designer = old_metadata.designer
            metadata.category = old_metadata.category
            metadata.date_added = old_metadata.date_added
    else:
        metadata.designer = 'UNKNOWN'
        metadata.category = 'SANS_SERIF'
        metadata.date_added = time.strftime('%Y-%m-%d')

    metadata.license = font_license
    subsets = sorted(subsets)
    for subset in subsets:
        metadata.subsets.append(subset)

    for (fontfile, family, style, weight) in file_family_style_weights:
        filename = os.path.basename(fontfile)
        font_psname = fonts.ExtractName(fontfile, fonts.NAME_PSNAME,
                                        os.path.splitext(filename)[0])
        font_copyright = fonts.ExtractName(fontfile, fonts.NAME_COPYRIGHT,
                                           '???.').strip()

        font_metadata = metadata.fonts.add()
        font_metadata.name = family
        font_metadata.style = style
        font_metadata.weight = weight
        font_metadata.filename = filename
        font_metadata.post_script_name = font_psname
        default_fullname = os.path.splitext(filename)[0].replace('-', ' ')
        font_metadata.full_name = fonts.ExtractName(fontfile,
                                                    fonts.NAME_FULLNAME,
                                                    default_fullname)
        font_metadata.copyright = font_copyright

    axes_info_from_font_files \
      = {_AxisInfo(f.file) for f in file_family_style_weights}
    if len(axes_info_from_font_files) != 1:
        raise RuntimeError(
            'Variable axes info not matching between font files')

    for axes_info in axes_info_from_font_files:
        if axes_info:
            for axes in axes_info:
                var_axes = metadata.axes.add()
                var_axes.tag = axes[0]
                var_axes.min_value = axes[1]
                var_axes.default_value = axes[2]
                var_axes.max_value = axes[3]

    return metadata
Пример #10
0
def _AddLangNames(metadata_path, line_to_lang_name):
    family = _ReadProto(fonts_public_pb2.FamilyProto(), metadata_path)
    _WriteProto(family, metadata_path, comments=line_to_lang_name)
Пример #11
0
def main(argv):
    languages = _LoadLanguages(os.path.join(FLAGS.lang, 'languages'))
    regions = _LoadRegions(os.path.join(FLAGS.lang, 'regions'))

    if FLAGS.samples:
        assert len(argv) > 1, 'No METADATA.pb files specified'
        line_to_lang_name = {}
        for l in languages:
            line = 'languages: "{code}"'.format(code=languages[l].id)
            line_to_lang_name[line] = languages[l].name
        samples = {}
        for sample_filename in os.listdir(FLAGS.samples):
            key = os.path.splitext(os.path.basename(sample_filename))[0]
            samples[key] = os.path.join(FLAGS.samples, sample_filename)
        for path in argv[1:]:
            family = _ReadProto(fonts_public_pb2.FamilyProto(), path)
            if True:  #len(family.languages) == 0 or family.name == 'Noto Sans Tamil Supplement':
                key = family.name.replace(' ', '')
                if key not in samples:
                    print('Family not found in samples: ' + family.name)
                    continue
                with open(samples[key], 'r') as f:
                    sample_data = yaml.safe_load(f)
                    sample_text = fonts_public_pb2.SampleTextProto()
                    sample_text.masthead_full = sample_data['masthead_full']
                    sample_text.masthead_partial = sample_data[
                        'masthead_partial']
                    sample_text.styles = sample_data['styles']
                    sample_text.tester = sample_data['tester']
                    sample_text.poster_sm = sample_data['poster_sm']
                    sample_text.poster_md = sample_data['poster_md']
                    sample_text.poster_lg = sample_data['poster_lg']
                    family.sample_text.MergeFrom(sample_text)
                    _WriteProto(family, path, comments=line_to_lang_name)

    if not FLAGS.udhrs:
        return

    if FLAGS.udhrs.endswith('.yaml'):
        with open(FLAGS.udhrs, 'r') as f:
            data = yaml.safe_load(f)
            for translation, meta in data.items():
                if 'lang_full' not in meta or meta['lang_full'] not in [
                        'ccp-Beng-IN', 'lad-Hebr-IL'
                ]:
                    continue
                language = meta['lang']
                if language.startswith('und-'):
                    continue
                script = re.search(r'.*-(.*)-.*', meta['lang_full']).group(
                    1) if 'script' not in meta else meta['script']
                key = language + '_' + script
                iso639_3 = meta['lang_639_3']
                iso15924 = script
                name = meta['name_lang'] if 'name_udhr' not in meta else meta[
                    'name_udhr']
                udhr = Udhr(key=key,
                            iso639_3=iso639_3,
                            iso15924=iso15924,
                            bcp47=key,
                            direction=None,
                            ohchr=None,
                            stage=4,
                            loc=None,
                            name=name)
                udhr.LoadArticleOne(translation)

                language = _GetLanguageForUdhr(languages, udhr)
                if not language.HasField('sample_text'):
                    language.sample_text.MergeFrom(udhr.GetSampleTexts())
                if 'name_autonym' in meta and not language.HasField('autonym'):
                    language.autonym = meta['name_autonym'].strip()
                _WriteProto(
                    language,
                    os.path.join(FLAGS.lang, 'languages',
                                 language.id + '.textproto'))

    elif FLAGS.udhrs.endswith('.csv'):
        with open(FLAGS.udhrs, newline='') as csvfile:
            reader = csv.reader(csvfile, delimiter=',', quotechar='"')
            head = next(reader)
            index_id = head.index('id')
            index_name = head.index('language')
            index_historical = head.index('historical')
            index_sample = head.index('SAMPLE')
            for row in reader:
                id = row[index_id]
                if id in languages:
                    language = languages[row[index_id]]
                else:
                    language = fonts_public_pb2.LanguageProto()
                    language.id = id
                    language.language, language.script = id.split('_')
                    language.name = row[index_name]
                historical = row[index_historical] == 'X'
                if language.historical != historical:
                    if historical:
                        language.historical = True
                    else:
                        language.ClearField('historical')
                sample = row[index_sample]
                if sample and not sample.startswith('http'):
                    udhr = Udhr(key=id,
                                iso639_3=language.language,
                                iso15924=language.script,
                                bcp47=id,
                                direction=None,
                                ohchr=None,
                                stage=4,
                                loc=None,
                                name=None)
                    udhr.LoadArticleOne(sample)
                    if not language.HasField('sample_text'):
                        language.sample_text.MergeFrom(udhr.GetSampleTexts())
                _WriteProto(
                    language,
                    os.path.join(FLAGS.lang, 'languages',
                                 language.id + '.textproto'))

    elif os.path.isdir(FLAGS.udhrs):
        for udhr_path in glob.glob(os.path.join(FLAGS.udhrs, '*')):
            if udhr_path.endswith('index.xml') or os.path.basename(
                    udhr_path).startswith('status'):
                continue
            udhr_data = etree.parse(udhr_path)
            head = udhr_data.getroot()
            for name, value in head.attrib.items():
                if re.search(r'\{.*\}lang', name):
                    bcp47 = value.replace('-', '_')
            udhr = Udhr(key=head.get('key'),
                        iso639_3=head.get('iso639-3'),
                        iso15924=head.get('iso15924'),
                        bcp47=bcp47,
                        direction=head.get('dir'),
                        ohchr=None,
                        stage=4,
                        loc=None,
                        name=head.get('n'))
            udhr.Parse(udhr_data)

            language = _GetLanguageForUdhr(languages, udhr)
            if language.id in languages or language.HasField('sample_text'):
                continue
            language.sample_text.MergeFrom(udhr.GetSampleTexts())
            _WriteProto(
                language,
                os.path.join(FLAGS.lang, 'languages',
                             language.id + '.textproto'))

    else:
        raise Exception('Unsupported input type for --udhrs: ' + FLAGS.udhrs)
Пример #12
0
def family_dir_name(path):
    metadata_file = path / "METADATA.pb"
    assert metadata_file.exists()
    return read_proto(metadata_file, fonts_pb2.FamilyProto()).name
Пример #13
0
def _MakeMetadata(fontdir, is_new):
    """Builds a dictionary matching a METADATA.pb file.

  Args:
    fontdir: Directory containing font files for which we want metadata.
    is_new: Whether this is an existing or new family.
  Returns:
    A fonts_pb2.FamilyProto message, the METADATA.pb structure.
  Raises:
    RuntimeError: If the variable font axes info differs between font files of
    same family.
  """
    file_family_style_weights = _FileFamilyStyleWeights(fontdir)

    first_file = file_family_style_weights[0].file
    old_metadata_file = os.path.join(fontdir, 'METADATA.pb')
    font_license = fonts.LicenseFromPath(fontdir)

    metadata = fonts_pb2.FamilyProto()
    metadata.name = file_family_style_weights[0].family

    subsets_in_font = [
        s[0]
        for s in SubsetsInFont(first_file, FLAGS.min_pct, FLAGS.min_pct_ext)
    ]

    if not is_new:
        old_metadata = fonts.ReadProto(fonts_pb2.FamilyProto(),
                                       old_metadata_file)
        metadata.designer = old_metadata.designer
        metadata.category[:] = old_metadata.category
        metadata.date_added = old_metadata.date_added
        subsets = set(old_metadata.subsets) | set(subsets_in_font)
        metadata.languages[:] = old_metadata.languages
        metadata.fallbacks.extend(old_metadata.fallbacks)
    else:
        metadata.designer = 'UNKNOWN'
        metadata.category.append('SANS_SERIF')
        metadata.date_added = time.strftime('%Y-%m-%d')
        subsets = ['menu'] + subsets_in_font

    metadata.license = font_license
    subsets = sorted(subsets)
    for subset in subsets:
        metadata.subsets.append(subset)

    for (fontfile, family, style, weight) in file_family_style_weights:
        filename = os.path.basename(fontfile)
        font_psname = fonts.ExtractName(fontfile, fonts.NAME_PSNAME,
                                        os.path.splitext(filename)[0])
        font_copyright = fonts.ExtractName(fontfile, fonts.NAME_COPYRIGHT,
                                           '???.').strip()

        font_metadata = metadata.fonts.add()
        font_metadata.name = family
        font_metadata.style = style
        font_metadata.weight = weight
        font_metadata.filename = filename
        font_metadata.post_script_name = font_psname
        default_fullname = os.path.splitext(filename)[0].replace('-', ' ')
        font_metadata.full_name = fonts.ExtractName(fontfile,
                                                    fonts.NAME_FULLNAME,
                                                    default_fullname)
        font_metadata.copyright = font_copyright

    if not metadata.languages:
        exemplar_font_fp = os.path.join(
            fontdir,
            fonts.GetExemplarFont(metadata).filename)
        exemplar_font = ttLib.TTFont(exemplar_font_fp)
        languages = fonts.LoadLanguages(os.path.join(FLAGS.lang, 'languages'))
        supported_languages = fonts.SupportedLanguages(exemplar_font,
                                                       languages)
        supported_languages = sorted([l.id for l in supported_languages])
        metadata.languages.extend(supported_languages)

    axes_info_from_font_files \
      = {_AxisInfo(f.file) for f in file_family_style_weights}
    if len(axes_info_from_font_files) != 1:
        raise RuntimeError(
            'Variable axes info not matching between font files')

    for axes_info in axes_info_from_font_files:
        if axes_info:
            for axes in axes_info:
                var_axes = metadata.axes.add()
                var_axes.tag = axes[0]
                var_axes.min_value = axes[1]
                var_axes.max_value = axes[2]

    return metadata