# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. import os import sys import ucd ucd_rootdir = sys.argv[1] ucd_version = sys.argv[2] unicode_chars = {} for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): for codepoint in data['CodePoint']: unicode_chars[codepoint] = data['GeneralCategory'] if '--with-csur' in sys.argv: for csur in ['Klingon']: for data in ucd.parse_ucd_data('data/csur', csur): for codepoint in data['CodePoint']: unicode_chars[codepoint] = data['GeneralCategory'] # This map is a combination of the information in the UnicodeData and Blocks # data files. It is intended to reduce the number of character tables that # need to be generated. category_sets = [ (ucd.CodeRange('000000..00D7FF'), None, 'Multiple Blocks'), (ucd.CodeRange('00D800..00DFFF'), 'Cs', 'Surrogates'), (ucd.CodeRange('00E000..00F7FF'), 'Co', 'Private Use Area'),
# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. import os import sys import ucd ucd_rootdir = sys.argv[1] ucd_version = sys.argv[2] unicode_chars = {} for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): for codepoint in data['Range']: unicode_chars[codepoint] = data['Script'] if '--with-csur' in sys.argv: for csur in ['Klingon']: for data in ucd.parse_ucd_data('data/csur', csur): for codepoint in data['CodePoint']: unicode_chars[codepoint] = data['Script'] # This map is a combination of the information in the UnicodeData and Blocks # data files. It is intended to reduce the number of character tables that # need to be generated. script_sets = [ (ucd.CodeRange('000000..00D7FF'), None, 'Multiple Blocks'), (ucd.CodeRange('00D800..00F7FF'), 'Zzzz', 'Surrogates / Private Use Area'), (ucd.CodeRange('00F800..02FAFF'), None, 'Multiple Blocks'),
import os import sys import ucd ucd_rootdir = sys.argv[1] emoji_rootdir = 'data/emoji' csur_rootdir = 'data/csur' null = ucd.CodePoint('0000') properties = [(ucd_rootdir, 'PropList'), (ucd_rootdir, 'DerivedCoreProperties'), (emoji_rootdir, 'emoji-data'), ('data/espeak-ng', 'PropList')] unicode_chars = {} for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): for codepoint in data['CodePoint']: unicode_chars[codepoint] = data for propdir, propfile in properties: for data in ucd.parse_ucd_data(propdir, propfile): for codepoint in data['Range']: try: unicode_chars[codepoint][data['Property']] = 1 except KeyError: unicode_chars[codepoint] = {'CodePoint': codepoint} unicode_chars[codepoint][data['Property']] = 1 for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): for codepoint in data['Range']: unicode_chars[codepoint]['Script'] = data['Script'] if '--with-csur' in sys.argv: for csur in ['Klingon']:
# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. import os import sys import ucd ucd_rootdir = sys.argv[1] csur_rootdir = 'data/csur' unicode_chars = {} for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): for codepoint in data['CodePoint']: unicode_chars[codepoint] = data unicode_chars[codepoint]['Properties'] = [] for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'): if data['Property'] in ['White_Space']: for codepoint in data['Range']: unicode_chars[codepoint]['Properties'].append(data['Property']) for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): for codepoint in data['Range']: unicode_chars[codepoint]['Script'] = data['Script'] if '--with-csur' in sys.argv: for csur in ['Klingon']: for data in ucd.parse_ucd_data('data/csur', csur): for codepoint in data['CodePoint']: if not 'TitleCase' in data: data['TitleCase'] = codepoint
import sys import ucd ucd_rootdir = sys.argv[1] csur_rootdir = 'data/csur' null = ucd.CodePoint('0000') properties = [ (ucd_rootdir, 'PropList'), (ucd_rootdir, 'DerivedCoreProperties'), ('data/espeak-ng', 'PropList') ] unicode_chars = {} for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): for codepoint in data['CodePoint']: unicode_chars[codepoint] = data for propdir, propfile in properties: for data in ucd.parse_ucd_data(propdir, propfile): for codepoint in data['Range']: try: unicode_chars[codepoint][data['Property']] = 1 except KeyError: unicode_chars[codepoint] = {'CodePoint': codepoint} unicode_chars[codepoint][data['Property']] = 1 for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): for codepoint in data['Range']: unicode_chars[codepoint]['Script'] = data['Script'] if '--with-csur' in sys.argv: for csur in ['Klingon']:
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. import os import sys import ucd ucd_rootdir = sys.argv[1] ucd_version = sys.argv[2] unicode_chars = {} null = ucd.CodePoint('0000') for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): if data['LowerCase'] != null or data['UpperCase'] != null or data['TitleCase'] != null: unicode_chars[data['CodePoint']] = (data['LowerCase'], data['UpperCase'], data['TitleCase']) if __name__ == '__main__': sys.stdout.write("""/* Unicode Case Conversion * * Copyright (C) 2012-2016 Reece H. Dunn * * This file is part of ucd-tools. * * ucd-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. *
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. import os import sys import ucd ucd_rootdir = sys.argv[1] ucd_version = sys.argv[2] unicode_chars = {} null = ucd.CodePoint('0000') for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): if data['LowerCase'] != null or data['UpperCase'] != null or data[ 'TitleCase'] != null: unicode_chars[data['CodePoint']] = (data['LowerCase'], data['UpperCase'], data['TitleCase']) if __name__ == '__main__': sys.stdout.write("""/* Unicode Case Conversion * * Copyright (C) 2012-2018 Reece H. Dunn * * This file is part of ucd-tools. * * ucd-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by