def main(argv): languages = _LoadLanguages(os.path.join(FLAGS.lang, 'languages')) scripts = _LoadScripts(os.path.join(FLAGS.lang, 'scripts')) regions = _LoadRegions(os.path.join(FLAGS.lang, 'regions')) if FLAGS.report: assert len(argv) > 1, 'No METADATA.pb files specified' assert FLAGS.out is not None, 'No output dir specified (--out)' print('Writing insights report...') _WriteReport(argv[1:], FLAGS.out, languages) elif FLAGS.sample_text_audit: assert FLAGS.out is not None, 'No output dir specified (--out)' print('Auditing sample text') seen_scripts = set() unused_scripts = set() for path in argv[1:]: family = _ReadProto(fonts_public_pb2.FamilyProto(), path) for l in family.languages: seen_scripts.add(languages[l].script) for s in scripts: if s not in seen_scripts: unused_scripts.add(s) _SampleTextAudit(FLAGS.out, languages, scripts, unused_scripts) else: assert len(argv) > 1, 'No METADATA.pb files specified' line_to_lang_name = {} for l in languages: line = 'languages: "{code}"'.format(code=languages[l].id) line_to_lang_name[line] = languages[l].name for path in argv[1:]: _AddLanguageSupportMetadata(path, languages, scripts, line_to_lang_name)
def Metadata(file_or_dir): """Returns fonts_metadata.proto object for a metadata file. If file_or_dir is a file named METADATA.pb, load it. If file_or_dir is a directory, load the METADATA.pb file in that directory. Args: file_or_dir: A file or directory. Returns: Python object loaded from METADATA.pb content. Raises: ValueError: if file_or_dir isn't a METADATA.pb file or dir containing one. """ if (os.path.isfile(file_or_dir) and os.path.basename(file_or_dir) == 'METADATA.pb'): metadata_file = file_or_dir elif os.path.isdir(file_or_dir): metadata_file = os.path.join(file_or_dir, 'METADATA.pb') if not os.path.isfile(metadata_file): raise ValueError('No METADATA.pb in %s' % file_or_dir) else: raise ValueError( '%s is neither METADATA.pb file or a directory' % file_or_dir) msg = fonts_pb2.FamilyProto() with codecs.open(metadata_file, encoding='utf-8') as f: text_format.Merge(f.read(), msg) return msg
def _AddGlyphMetadata(metadata_path): family = _ReadProto(fonts_public_pb2.FamilyProto(), metadata_path) font = _GetExemplarFont(family) fontfile = os.path.join(os.path.dirname(metadata_path), font.filename) family.sample_glyphs.clear() for section, glyphs in _GetSampleGlyphs(fontfile): family.sample_glyphs[section] = glyphs _WriteProto(family, metadata_path)
def _AddLanguageSupportMetadata(metadata_path, languages, scripts, line_to_lang_name): family = _ReadProto(fonts_public_pb2.FamilyProto(), metadata_path) if len(family.languages) > 0: return font = _GetExemplarFont(family) fontfile = os.path.join(os.path.dirname(metadata_path), font.filename) supported_languages = _SupportedLanguages(fontfile, languages) supported_languages = [l.id for l in supported_languages] family.languages.extend(sorted(supported_languages)) _WriteProto(family, metadata_path, comments=line_to_lang_name)
def main(argv): languages = fonts.LoadLanguages(os.path.join(FLAGS.lang, 'languages')) scripts = fonts.LoadScripts(os.path.join(FLAGS.lang, 'scripts')) if FLAGS.report: assert len(argv) > 1, 'No METADATA.pb files specified' assert FLAGS.out is not None, 'No output dir specified (--out)' print('Writing insights report...') _WriteReport(argv[1:], FLAGS.out, languages) elif FLAGS.sample_text_audit: assert FLAGS.out is not None, 'No output dir specified (--out)' print('Auditing sample text') seen_scripts = set() unused_scripts = set() for path in argv[1:]: family = fonts.ReadProto(fonts_public_pb2.FamilyProto(), path) for l in family.languages: seen_scripts.add(languages[l].script) for s in scripts: if s not in seen_scripts: unused_scripts.add(s) _SampleTextAudit(FLAGS.out, languages, scripts, unused_scripts) else: assert len(argv) > 1, 'No METADATA.pb files specified' language_comments = fonts.LanguageComments(languages) for path in argv[1:]: family_metadata = fonts.ReadProto(fonts_public_pb2.FamilyProto(), path) if len(family_metadata.languages) > 0: continue exemplar_font_fp = os.path.join( os.path.dirname(path), fonts.GetExemplarFont(family_metadata).filename) exemplar_font = TTFont(exemplar_font_fp) supported_languages = fonts.SupportedLanguages( exemplar_font, languages) supported_languages = sorted([l.id for l in supported_languages]) family_metadata.languages.extend(supported_languages) fonts.WriteProto(family_metadata, path, comments=language_comments)
def families_from_file(fp): """Convert to_sandbox.txt and to_production.txt files to a list of family names.""" results = set() with open(fp) as doc: family_dirs = doc.read().split() metadata_files = [Path(fp).parent / d / 'METADATA.pb' for d in family_dirs] missing_files = [str(f) for f in metadata_files if not f.is_file()] if missing_files: raise FileNotFoundError( "Following METADATA.pbs files are missing:\n{}".format( "\n".join(missing_files) ) ) return [read_proto(f, fonts_pb2.FamilyProto()).name for f in metadata_files]
def main(argv): assert len(argv) > 1, 'No METADATA.pb files specified' if FLAGS.preview: print('Running in preview mode. No changes will be made.') print('The names of families detected as part of the Noto') print('collection will be printed below.') for path in argv[1:]: family = _ReadProto(fonts_public_pb2.FamilyProto(), path) if NOTO_FAMILY_NAME.search(family.name): if FLAGS.preview: print(family.name) else: family.is_noto = True _WriteProto(family, path)
def _WriteReport(metadata_paths, out_dir, languages): rows = [[ 'id', 'name', 'lang', 'script', 'population', 'ec_base', 'ec_auxiliary', 'ec_marks', 'ec_numerals', 'ec_punctuation', 'ec_index', 'st_fallback', 'st_fallback_name', 'st_masthead_full', 'st_masthead_partial', 'st_styles', 'st_tester', 'st_poster_sm', 'st_poster_md', 'st_poster_lg', 'st_specimen_48', 'st_specimen_36', 'st_specimen_32', 'st_specimen_21', 'st_specimen_16' ]] without_lang = [] without_sample_text = [] supported_without_sample_text = {} for metadata_path in metadata_paths: family = fonts.ReadProto(fonts_public_pb2.FamilyProto(), metadata_path) if len(family.languages) == 0: without_lang.append(family.name) else: supports_lang_with_sample_text = False for lang_code in family.languages: if languages[lang_code].HasField('sample_text'): supports_lang_with_sample_text = True break if not supports_lang_with_sample_text: without_sample_text.append(family.name) for l in family.languages: if not languages[l].HasField( 'sample_text') and l not in supported_without_sample_text: supported_without_sample_text[l] = languages[l] for lang in supported_without_sample_text.values(): rows.append( [lang.id, lang.name, lang.language, lang.script, lang.population]) path = os.path.join(out_dir, 'support.csv') _WriteCsv(path, rows)
def _MakeMetadata(fontdir, is_new): """Builds a dictionary matching a METADATA.pb file. Args: fontdir: Directory containing font files for which we want metadata. is_new: Whether this is an existing or new family. Returns: OrderedDict of a complete METADATA.pb structure. Raises: RuntimeError: If the variable font axes info differs between font files of same family. """ file_family_style_weights = _FileFamilyStyleWeights(fontdir) first_file = file_family_style_weights[0].file subsets = ['menu'] + [ s[0] for s in fonts.SubsetsInFont(first_file, FLAGS.min_pct, FLAGS.min_pct_ext) ] old_metadata_file = os.path.join(fontdir, 'METADATA.pb') font_license = fonts.LicenseFromPath(fontdir) metadata = fonts_pb2.FamilyProto() metadata.name = file_family_style_weights[0].family if not is_new: old_metadata = fonts_pb2.FamilyProto() with open(old_metadata_file, 'rb') as old_meta: text_format.Parse(old_meta.read(), old_metadata) metadata.designer = old_metadata.designer metadata.category = old_metadata.category metadata.date_added = old_metadata.date_added else: metadata.designer = 'UNKNOWN' metadata.category = 'SANS_SERIF' metadata.date_added = time.strftime('%Y-%m-%d') metadata.license = font_license subsets = sorted(subsets) for subset in subsets: metadata.subsets.append(subset) for (fontfile, family, style, weight) in file_family_style_weights: filename = os.path.basename(fontfile) font_psname = fonts.ExtractName(fontfile, fonts.NAME_PSNAME, os.path.splitext(filename)[0]) font_copyright = fonts.ExtractName(fontfile, fonts.NAME_COPYRIGHT, '???.').strip() font_metadata = metadata.fonts.add() font_metadata.name = family font_metadata.style = style font_metadata.weight = weight font_metadata.filename = filename font_metadata.post_script_name = font_psname default_fullname = os.path.splitext(filename)[0].replace('-', ' ') font_metadata.full_name = fonts.ExtractName(fontfile, fonts.NAME_FULLNAME, default_fullname) font_metadata.copyright = font_copyright axes_info_from_font_files \ = {_AxisInfo(f.file) for f in file_family_style_weights} if len(axes_info_from_font_files) != 1: raise RuntimeError( 'Variable axes info not matching between font files') for axes_info in axes_info_from_font_files: if axes_info: for axes in axes_info: var_axes = metadata.axes.add() var_axes.tag = axes[0] var_axes.min_value = axes[1] var_axes.default_value = axes[2] var_axes.max_value = axes[3] return metadata
def _AddLangNames(metadata_path, line_to_lang_name): family = _ReadProto(fonts_public_pb2.FamilyProto(), metadata_path) _WriteProto(family, metadata_path, comments=line_to_lang_name)
def main(argv): languages = _LoadLanguages(os.path.join(FLAGS.lang, 'languages')) regions = _LoadRegions(os.path.join(FLAGS.lang, 'regions')) if FLAGS.samples: assert len(argv) > 1, 'No METADATA.pb files specified' line_to_lang_name = {} for l in languages: line = 'languages: "{code}"'.format(code=languages[l].id) line_to_lang_name[line] = languages[l].name samples = {} for sample_filename in os.listdir(FLAGS.samples): key = os.path.splitext(os.path.basename(sample_filename))[0] samples[key] = os.path.join(FLAGS.samples, sample_filename) for path in argv[1:]: family = _ReadProto(fonts_public_pb2.FamilyProto(), path) if True: #len(family.languages) == 0 or family.name == 'Noto Sans Tamil Supplement': key = family.name.replace(' ', '') if key not in samples: print('Family not found in samples: ' + family.name) continue with open(samples[key], 'r') as f: sample_data = yaml.safe_load(f) sample_text = fonts_public_pb2.SampleTextProto() sample_text.masthead_full = sample_data['masthead_full'] sample_text.masthead_partial = sample_data[ 'masthead_partial'] sample_text.styles = sample_data['styles'] sample_text.tester = sample_data['tester'] sample_text.poster_sm = sample_data['poster_sm'] sample_text.poster_md = sample_data['poster_md'] sample_text.poster_lg = sample_data['poster_lg'] family.sample_text.MergeFrom(sample_text) _WriteProto(family, path, comments=line_to_lang_name) if not FLAGS.udhrs: return if FLAGS.udhrs.endswith('.yaml'): with open(FLAGS.udhrs, 'r') as f: data = yaml.safe_load(f) for translation, meta in data.items(): if 'lang_full' not in meta or meta['lang_full'] not in [ 'ccp-Beng-IN', 'lad-Hebr-IL' ]: continue language = meta['lang'] if language.startswith('und-'): continue script = re.search(r'.*-(.*)-.*', meta['lang_full']).group( 1) if 'script' not in meta else meta['script'] key = language + '_' + script iso639_3 = meta['lang_639_3'] iso15924 = script name = meta['name_lang'] if 'name_udhr' not in meta else meta[ 'name_udhr'] udhr = Udhr(key=key, iso639_3=iso639_3, iso15924=iso15924, bcp47=key, direction=None, ohchr=None, stage=4, loc=None, name=name) udhr.LoadArticleOne(translation) language = _GetLanguageForUdhr(languages, udhr) if not language.HasField('sample_text'): language.sample_text.MergeFrom(udhr.GetSampleTexts()) if 'name_autonym' in meta and not language.HasField('autonym'): language.autonym = meta['name_autonym'].strip() _WriteProto( language, os.path.join(FLAGS.lang, 'languages', language.id + '.textproto')) elif FLAGS.udhrs.endswith('.csv'): with open(FLAGS.udhrs, newline='') as csvfile: reader = csv.reader(csvfile, delimiter=',', quotechar='"') head = next(reader) index_id = head.index('id') index_name = head.index('language') index_historical = head.index('historical') index_sample = head.index('SAMPLE') for row in reader: id = row[index_id] if id in languages: language = languages[row[index_id]] else: language = fonts_public_pb2.LanguageProto() language.id = id language.language, language.script = id.split('_') language.name = row[index_name] historical = row[index_historical] == 'X' if language.historical != historical: if historical: language.historical = True else: language.ClearField('historical') sample = row[index_sample] if sample and not sample.startswith('http'): udhr = Udhr(key=id, iso639_3=language.language, iso15924=language.script, bcp47=id, direction=None, ohchr=None, stage=4, loc=None, name=None) udhr.LoadArticleOne(sample) if not language.HasField('sample_text'): language.sample_text.MergeFrom(udhr.GetSampleTexts()) _WriteProto( language, os.path.join(FLAGS.lang, 'languages', language.id + '.textproto')) elif os.path.isdir(FLAGS.udhrs): for udhr_path in glob.glob(os.path.join(FLAGS.udhrs, '*')): if udhr_path.endswith('index.xml') or os.path.basename( udhr_path).startswith('status'): continue udhr_data = etree.parse(udhr_path) head = udhr_data.getroot() for name, value in head.attrib.items(): if re.search(r'\{.*\}lang', name): bcp47 = value.replace('-', '_') udhr = Udhr(key=head.get('key'), iso639_3=head.get('iso639-3'), iso15924=head.get('iso15924'), bcp47=bcp47, direction=head.get('dir'), ohchr=None, stage=4, loc=None, name=head.get('n')) udhr.Parse(udhr_data) language = _GetLanguageForUdhr(languages, udhr) if language.id in languages or language.HasField('sample_text'): continue language.sample_text.MergeFrom(udhr.GetSampleTexts()) _WriteProto( language, os.path.join(FLAGS.lang, 'languages', language.id + '.textproto')) else: raise Exception('Unsupported input type for --udhrs: ' + FLAGS.udhrs)
def family_dir_name(path): metadata_file = path / "METADATA.pb" assert metadata_file.exists() return read_proto(metadata_file, fonts_pb2.FamilyProto()).name
def _MakeMetadata(fontdir, is_new): """Builds a dictionary matching a METADATA.pb file. Args: fontdir: Directory containing font files for which we want metadata. is_new: Whether this is an existing or new family. Returns: A fonts_pb2.FamilyProto message, the METADATA.pb structure. Raises: RuntimeError: If the variable font axes info differs between font files of same family. """ file_family_style_weights = _FileFamilyStyleWeights(fontdir) first_file = file_family_style_weights[0].file old_metadata_file = os.path.join(fontdir, 'METADATA.pb') font_license = fonts.LicenseFromPath(fontdir) metadata = fonts_pb2.FamilyProto() metadata.name = file_family_style_weights[0].family subsets_in_font = [ s[0] for s in SubsetsInFont(first_file, FLAGS.min_pct, FLAGS.min_pct_ext) ] if not is_new: old_metadata = fonts.ReadProto(fonts_pb2.FamilyProto(), old_metadata_file) metadata.designer = old_metadata.designer metadata.category[:] = old_metadata.category metadata.date_added = old_metadata.date_added subsets = set(old_metadata.subsets) | set(subsets_in_font) metadata.languages[:] = old_metadata.languages metadata.fallbacks.extend(old_metadata.fallbacks) else: metadata.designer = 'UNKNOWN' metadata.category.append('SANS_SERIF') metadata.date_added = time.strftime('%Y-%m-%d') subsets = ['menu'] + subsets_in_font metadata.license = font_license subsets = sorted(subsets) for subset in subsets: metadata.subsets.append(subset) for (fontfile, family, style, weight) in file_family_style_weights: filename = os.path.basename(fontfile) font_psname = fonts.ExtractName(fontfile, fonts.NAME_PSNAME, os.path.splitext(filename)[0]) font_copyright = fonts.ExtractName(fontfile, fonts.NAME_COPYRIGHT, '???.').strip() font_metadata = metadata.fonts.add() font_metadata.name = family font_metadata.style = style font_metadata.weight = weight font_metadata.filename = filename font_metadata.post_script_name = font_psname default_fullname = os.path.splitext(filename)[0].replace('-', ' ') font_metadata.full_name = fonts.ExtractName(fontfile, fonts.NAME_FULLNAME, default_fullname) font_metadata.copyright = font_copyright if not metadata.languages: exemplar_font_fp = os.path.join( fontdir, fonts.GetExemplarFont(metadata).filename) exemplar_font = ttLib.TTFont(exemplar_font_fp) languages = fonts.LoadLanguages(os.path.join(FLAGS.lang, 'languages')) supported_languages = fonts.SupportedLanguages(exemplar_font, languages) supported_languages = sorted([l.id for l in supported_languages]) metadata.languages.extend(supported_languages) axes_info_from_font_files \ = {_AxisInfo(f.file) for f in file_family_style_weights} if len(axes_info_from_font_files) != 1: raise RuntimeError( 'Variable axes info not matching between font files') for axes_info in axes_info_from_font_files: if axes_info: for axes in axes_info: var_axes = metadata.axes.add() var_axes.tag = axes[0] var_axes.min_value = axes[1] var_axes.max_value = axes[2] return metadata