def __init__(self, file_path, is_valid_case): self.__name = file_path.stem self.__identifier = sanitize(self.__name) self.__data = utils.read_all_text_from_file(file_path, logger=True).strip() self.condition = '' self.requires_unicode = False if is_valid_case: self.__expected = True path_base = str(Path(file_path.parent, file_path.stem)) yaml_file = Path(path_base + r'.yaml') if yaml_file.exists(): self.__expected = python_to_tomlpp( yaml.load(utils.read_all_text_from_file(yaml_file, logger=True), Loader=yaml.FullLoader)) else: json_file = Path(path_base + r'.json') if json_file.exists(): self.__expected = python_to_tomlpp( json_to_python( json.loads( utils.read_all_text_from_file(json_file, logger=True), ))) else: self.__expected = False
def __init__(self, file_path, name, is_valid_case): self.__name = name self.__identifier = sanitize(self.__name) self.__group = self.__identifier.strip('_').split('_')[0] self.__data = utils.read_all_text_from_file(file_path, logger=True).strip() self.__data = re.sub(r'\\[ \t]+?\n', '\\\n', self.__data, re.S) # C++ compilers don't like whitespace after trailing slashes self.__conditions = [] if is_valid_case: self.__expected = True path_base = str(Path(file_path.parent, file_path.stem)) yaml_file = Path(path_base + r'.yaml') if yaml_file.exists(): self.__expected = python_to_tomlpp(yaml.load( utils.read_all_text_from_file(yaml_file, logger=True), Loader=yaml.FullLoader )) else: json_file = Path(path_base + r'.json') if json_file.exists(): self.__expected = python_to_tomlpp(json_to_python(json.loads( utils.read_all_text_from_file(json_file, logger=True), ))) else: self.__expected = False
def main(): hpp_path = Path(utils.entry_script_dir(), '..', 'toml.hpp').resolve() hash1 = utils.sha1(utils.read_all_text_from_file(hpp_path, logger=True)) print(rf'Hash 1: {hash1}') utils.run_python_script(r'generate_single_header.py') hash2 = utils.sha1(utils.read_all_text_from_file(hpp_path, logger=True)) print(rf'Hash 2: {hash2}') if (hash1 != hash2): print( "toml.hpp wasn't up-to-date!\nRun generate_single_header.py before your commit to prevent this error.", file=sys.stderr) return 1 print("toml.hpp was up-to-date") return 0
def __single_tags_substitute(cls, m): if (str(m[1]).lower() == 'emoji'): emoji = str(m[2]).strip().lower() if emoji == '': return '' if cls.__emojis is None: file_path = path.join(utils.get_script_folder(), 'emojis.json') cls.__emojis = json.loads( utils.read_all_text_from_file( file_path, 'https://api.github.com/emojis')) if '__processed' not in cls.__emojis: emojis = {} for key, uri in cls.__emojis.items(): m2 = cls.__emoji_uri.fullmatch(uri) if m2: emojis[key] = [str(m2[1]).upper(), uri] aliases = [('sundae', 'ice_cream')] for alias, key in aliases: emojis[alias] = emojis[key] emojis['__processed'] = True with open(file_path, 'w', encoding='utf-8', newline='\n') as f: f.write(json.dumps(emojis, sort_keys=True, indent=4)) cls.__emojis = emojis if emoji not in cls.__emojis: return '' return '&#x{}'.format(cls.__emojis[emoji][0]) else: return '<{}{}>'.format(m[1], (' ' + str(m[2]).strip()) if m[2] else '')
def __preprocess(self, match): raw_incl = match if isinstance(match, str) else match.group(1) incl = raw_incl.strip().lower() if incl in self.__processed_includes: return '' self.__processed_includes.append(incl) text = utils.read_all_text_from_file( path.join(utils.get_script_folder(), '..', 'include', 'toml++', incl)).strip() + '\n' text = text.replace('\r\n', '\n') # convert windows newlines text = self.__re_strip_blocks.sub('', text, 0) # strip {{ }} blocks self.__current_level += 1 text = self.__re_includes.sub(lambda m: self.__preprocess(m), text, 0) self.__current_level -= 1 if (self.__current_level == 1): header_text = '↓ ' + raw_incl lpad = 20 + ((25 * (self.__header_indent % 4)) - int( (len(header_text) + 4) / 2)) self.__header_indent += 1 text = '#if 1 {}\n{}\n\n#endif {}\n'.format( utils.make_divider(header_text, lpad, line_length=113), text, utils.make_divider('↑ ' + raw_incl, lpad, line_length=113)) return '\n\n' + text + '\n\n' # will get merged later
def main(): hpp_path = path.join(utils.get_script_folder(), '..', 'toml.hpp') hash1 = hashlib.sha1( utils.read_all_text_from_file(hpp_path).encode('utf-8')).hexdigest() print("Hash 1: {}".format(hash1)) utils.run_python_script('generate_single_header.py') hash2 = hashlib.sha1( utils.read_all_text_from_file(hpp_path).encode('utf-8')).hexdigest() print("Hash 2: {}".format(hash2)) if (hash1 != hash2): print( "toml.hpp wasn't up-to-date!\nRun generate_single_header.py before your commit to prevent this error.", file=sys.stderr) return 1 print("toml.hpp was up-to-date") return 0
def __preprocess(self, incl): if not isinstance(incl, (Path, str)): # a regex match object incl = incl.group(1).strip() if isinstance(incl, str): incl = Path(incl.strip().replace('\\',r'/')) if not incl.is_absolute(): incl = Path(self.__directory_stack[-1], incl).resolve() if incl in self.__processed_includes: return '' if self.__current_level == 0 and self.__entry_root == '': self.__entry_root = str(incl.parent).replace('\\',r'/') self.__processed_includes.append(incl) self.__directory_stack.append(incl.parent) text = utils.read_all_text_from_file(incl, logger=True).strip() + '\n' text = text.replace('\r\n', '\n') # convert windows newlines self.__current_level += 1 text = self.__re_includes.sub(lambda m : self.__preprocess(m), text, 0) self.__current_level -= 1 if self.__current_level == 1: header = str(incl).replace('\\',r'/') if header.startswith(self.__entry_root): header = header[len(self.__entry_root):].strip('/') header = utils.make_divider(header, 10, pattern = r'*') text = f'{header}\n\n{text}' self.__directory_stack.pop() return '\n\n' + text + '\n\n'
def main(): # get unicode character database codepoint_list = utils.read_all_text_from_file( path.join(utils.get_script_folder(), 'UnicodeData.txt'), 'https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt') # parse the database file into codepoints re_codepoint = re.compile(r'^([0-9a-fA-F]+);(.+?);([a-zA-Z]+);') current_range_start = -1 codepoints = [] parsed_codepoints = 0 for codepoint_entry in codepoint_list.split('\n'): match = re_codepoint.search(codepoint_entry) if (match is None): if (current_range_start > -1): raise Exception( 'Previous codepoint indicated the start of a range but the next one was null' ) continue codepoint = int('0x{}'.format(match.group(1)), 16) if (current_range_start > -1): for cp in range(current_range_start, codepoint): parsed_codepoints += 1 append_codepoint(codepoints, cp, match.group(3)) current_range_start = -1 else: if (match.group(2).endswith(', First>')): current_range_start = codepoint else: parsed_codepoints += 1 append_codepoint(codepoints, codepoint, match.group(3)) print("Extracted {} of {} codepoints from unicode database file.".format( len(codepoints), parsed_codepoints)) codepoints.sort(key=lambda r: r[0]) # write the output files header_file_path = path.join(utils.get_script_folder(), '..', 'include', 'toml++', 'toml_utf8_generated.h') test_file_path = path.join(utils.get_script_folder(), '..', 'tests', 'unicode_generated.cpp') print("Writing to {}".format(header_file_path)) with open(header_file_path, 'w', encoding='utf-8', newline='\n') as header_file: if G.generate_tests: print("Writing to {}".format(test_file_path)) with open(test_file_path, 'w', encoding='utf-8', newline='\n') as test_file: write_to_files(codepoints, header_file, test_file) else: write_to_files(codepoints, header_file, None)
def __init__(self, file_path, name, is_valid_case): self.__name = name self.__identifier = sanitize(name) self.__data = utils.read_all_text_from_file(file_path).strip() self.condition = '' if is_valid_case: self.__expected = True path_base = path.splitext(file_path)[0] yaml_file = path_base + '.yaml' if path.exists(yaml_file): self.__expected = python_to_tomlpp( yaml.load(utils.read_all_text_from_file(yaml_file), Loader=yaml.FullLoader)) else: json_file = path_base + '.json' if path.exists(json_file): self.__expected = python_to_tomlpp( json_to_python( json.loads( utils.read_all_text_from_file(json_file), ))) else: self.__expected = False
def preprocess(self, match): raw_incl = match if isinstance(match, str) else match.group(1) incl = raw_incl.strip().lower() if incl in self.processed_includes: return '' self.processed_includes.append(incl) text = utils.read_all_text_from_file(path.join(utils.get_script_folder(), '..', 'include', 'toml++', incl)).strip() + '\n' text = re.sub('\r\n', '\n', text, 0, re.I | re.M) # convert windows newlines text = re.sub(r'//[#!]\s*[{][{].*?//[#!]\s*[}][}]*?\n', '', text, 0, re.I | re.S) # strip {{ }} blocks self.current_level += 1 text = re.sub(r'^\s*#\s*include\s+"(.+?)"', lambda m : self.preprocess(m), text, 0, re.I | re.M) self.current_level -= 1 if (self.current_level == 1): header_text = '↓ ' + raw_incl lpad = 28 + ((25 * (self.header_indent % 4)) - int((len(header_text) + 4) / 2)) self.header_indent += 1 text = '{}\n#if 1\n\n{}\n\n#endif\n{}\n'.format( utils.make_divider(header_text, lpad), text, utils.make_divider('↑ ' + raw_incl, lpad) ) return '\n\n' + text + '\n\n' # will get merged later
def preprocess_xml(dir): global inline_namespaces global implementation_headers if not inline_namespaces and not implementation_headers: return write_xml_to_file = lambda x, f: x.write( f, encoding='utf-8', xml_declaration=True) inline_namespace_ids = [ f'namespace{doxygen_mangle_name(ns)}' for ns in inline_namespaces ] implementation_header_data = [ (hp, path.basename(hp), doxygen_mangle_name(path.basename(hp)), [(i, path.basename(i), doxygen_mangle_name(path.basename(i))) for i in impl]) for hp, impl in implementation_headers ] implementation_header_mappings = dict() implementation_header_innernamespaces = dict() implementation_header_sectiondefs = dict() for hdata in implementation_header_data: implementation_header_innernamespaces[hdata[2]] = [] implementation_header_sectiondefs[hdata[2]] = [] for (ip, ifn, iid) in hdata[3]: implementation_header_mappings[iid] = hdata if 1: extracted_implementation = False xml_files = utils.get_all_files(dir, any=('*.xml')) for xml_file in xml_files: print(f'Pre-processing {xml_file}') xml = ET.parse(xml_file) root = xml.getroot().find('compounddef') if root is None: continue changed = False # namespaces if root.get("kind") == "namespace" and inline_namespaces: for nsid in inline_namespace_ids: if root.get("id") == nsid: root.set("inline", "yes") changed = True break # dirs if root.get("kind") == "dir" and implementation_headers: innerfiles = root.findall('innerfile') for innerfile in innerfiles: if innerfile.get( 'refid') in implementation_header_mappings: root.remove(innerfile) changed = True # header files if root.get("kind") == "file" and implementation_headers: # remove junk not required by m.css for tag in ('includes', 'includedby', 'incdepgraph', 'invincdepgraph'): tags = root.findall(tag) if tags: for t in tags: root.remove(t) changed = True # rip the good bits out of implementation headers if root.get("id") in implementation_header_mappings: hid = implementation_header_mappings[root.get("id")][2] innernamespaces = root.findall('innernamespace') if innernamespaces: implementation_header_innernamespaces[ hid] = implementation_header_innernamespaces[ hid] + innernamespaces extracted_implementation = True for tag in innernamespaces: root.remove(tag) changed = True sectiondefs = root.findall('sectiondef') if sectiondefs: implementation_header_sectiondefs[ hid] = implementation_header_sectiondefs[ hid] + sectiondefs extracted_implementation = True for tag in sectiondefs: root.remove(tag) changed = True if changed: write_xml_to_file(xml, xml_file) # merge extracted implementations if extracted_implementation: for (hp, hfn, hid, impl) in implementation_header_data: xml_file = path.join(dir, f'{hid}.xml') print(f'Merging implementation nodes into {xml_file}') xml = ET.parse(xml_file) root = xml.getroot().find('compounddef') changed = False innernamespaces = root.findall('innernamespace') for new_tag in implementation_header_innernamespaces[hid]: matched = False for existing_tag in innernamespaces: if existing_tag.get('refid') == new_tag.get('refid'): matched = True break if not matched: root.append(new_tag) innernamespaces.append(new_tag) changed = True sectiondefs = root.findall('sectiondef') for new_section in implementation_header_sectiondefs[hid]: matched_section = False for existing_section in sectiondefs: if existing_section.get('kind') == new_section.get( 'kind'): matched_section = True memberdefs = existing_section.findall('memberdef') new_memberdefs = new_section.findall('memberdef') for new_memberdef in new_memberdefs: matched = False for existing_memberdef in memberdefs: if existing_memberdef.get( 'id') == new_memberdef.get('id'): matched = True break if not matched: new_section.remove(new_memberdef) existing_section.append(new_memberdef) memberdefs.append(new_memberdef) changed = True break if not matched_section: root.append(new_section) sectiondefs.append(new_section) changed = True if changed: write_xml_to_file(xml, xml_file) # delete the impl header xml files if 1 and implementation_headers: for hdata in implementation_header_data: for (ip, ifn, iid) in hdata[3]: xml_file = path.join(dir, f'{iid}.xml') if (path.exists(xml_file)): print(f'Deleting {xml_file}') os.remove(xml_file) # scan through the files and substitute impl header ids and paths as appropriate if 1 and implementation_headers: xml_files = utils.get_all_files(dir, any=('*.xml')) for xml_file in xml_files: print(f'Re-linking implementation headers in {xml_file}') xml = utils.read_all_text_from_file(xml_file) for (hp, hfn, hid, impl) in implementation_header_data: for (ip, ifn, iid) in impl: #xml = xml.replace(f'refid="{iid}"',f'refid="{hid}"') xml = xml.replace(f'compoundref="{iid}"', f'compoundref="{hid}"') xml = xml.replace(ip, hp) with open(xml_file, 'w', encoding='utf-8', newline='\n') as f: f.write(xml)
def main(): # establish local directories root_dir = utils.entry_script_dir().parent include_dir = Path(root_dir, 'include', 'toml++') # preprocess header(s) toml_h = str(Preprocessor(Path(include_dir, 'toml.h'))) # strip various things: if 1: for i in range(3): # trailing whitespace toml_h = re.sub('([^ \t])[ \t]+\n', r'\1\n', toml_h) # explicit 'strip this' blocks toml_h = re.sub(r'(?:\n[ \t]*)?//[#!][ \t]*[{][{].*?//[#!][ \t]*[}][}].*?\n', '\n', toml_h, flags=re.S) # spdx license identifiers toml_h = re.sub(r'^\s*//\s*SPDX-License-Identifier:.+?$', '', toml_h, 0, re.I | re.M) # double blank lines toml_h = re.sub('\n(?:[ \t]*\n[ \t]*)+\n', '\n\n', toml_h) # magic comments blank_line = r'(?:[ \t]*\n)' comment_line = r'(?:[ \t]*//(?:[/#!<]| ?(?:---|===|\^\^\^|vvv))[^\n]*\n)' toml_h = re.sub(rf'\n{comment_line}{blank_line}+{comment_line}', '\n', toml_h) toml_h = re.sub(rf'([{{,])\s*\n(?:{comment_line}|{blank_line})+', r'\1\n', toml_h) toml_h = re.sub(rf'{comment_line}+', '\n', toml_h) # weird spacing edge case between } and pp directives toml_h = re.sub('\n[}]\n#', r'\n}\n\n#', toml_h, re.S) # enable warnings -> disable warnings toml_h = re.sub('(TOML_ENABLE_WARNINGS;)\n[ \t\n]*\n(TOML_DISABLE_WARNINGS;)', r'', toml_h) # blank lines between consecutive TOML_XXXXX_WARNINGS statements toml_h = re.sub('(TOML_[A-Z_]+?_WARNINGS;)\n[ \t\n]*\n(TOML_[A-Z_]+?_WARNINGS;)', r'\1\n\2', toml_h) # blank lines between consecutive #includes toml_h = re.sub('[#]\s*include\s*<(.+?)>\n[ \t\n]*\n[#]\s*include\s*<(.+?)>', r'#include <\1>\n#include <\2>', toml_h) # blank lines following opening brackets or a comma toml_h = re.sub(r'([^@][({,])\n\n', r'\1\n', toml_h) # blank lines preceeding closing brackets toml_h = re.sub(r'\n\n([ \t]*[})])', r'\n\1', toml_h) # ensure only one trailing newline toml_h = toml_h.strip() + '\n' # change TOML_LIB_SINGLE_HEADER to 1 toml_h = re.sub( '#\s*define\s+TOML_LIB_SINGLE_HEADER\s+[0-9]+', '#define TOML_LIB_SINGLE_HEADER 1', toml_h, 0, re.I ) # read version number version_h = utils.read_all_text_from_file(Path(include_dir, 'impl/version.h'), logger=True) match = re.search( r'#\s*define\s+TOML_LIB_MAJOR\s+([0-9]+)[^0-9].*' + r'#\s*define\s+TOML_LIB_MINOR\s+([0-9]+)[^0-9].*' + r'#\s*define\s+TOML_LIB_PATCH\s+([0-9]+)[^0-9]', version_h, re.I | re.S) if match is None: raise Exception("could not find TOML_LIB_MAJOR, TOML_LIB_MINOR or TOML_LIB_PATCH impl/version.h") version = rf'{int(match[1])}.{int(match[2])}.{int(match[3])}' print(rf'Library version: {version}') # build the preamble (license etc) preamble = [] preamble.append(rf''' // toml++ v{version} // https://github.com/marzer/tomlplusplus // SPDX-License-Identifier: MIT''') preamble.append(r''' // - THIS FILE WAS ASSEMBLED FROM MULTIPLE HEADER FILES BY A SCRIPT - PLEASE DON'T EDIT IT DIRECTLY - // // If you wish to submit a contribution to toml++, hooray and thanks! Before you crack on, please be aware that this // file was assembled from a number of smaller files by a python script, and code contributions should not be made // against it directly. You should instead make your changes in the relevant source file(s). The file names of the files // that contributed to this header can be found at the beginnings and ends of the corresponding sections of this file.''') preamble.append(r''' // TOML Language Specifications: // latest: https://github.com/toml-lang/toml/blob/master/README.md // v1.0.0: https://toml.io/en/v1.0.0 // v0.5.0: https://toml.io/en/v0.5.0 // changelog: https://github.com/toml-lang/toml/blob/master/CHANGELOG.md''') preamble.append(utils.read_all_text_from_file(Path(utils.entry_script_dir(), '..', 'LICENSE').resolve(), logger=True)) # write the output with StringIO(newline='\n') as output: # build in a string buffer write = lambda txt, end='\n': print(txt, file=output, end=end) if (len(preamble) > 0): write(utils.make_divider()) for pre in preamble: write('//') for line in pre.strip().splitlines(): if len(line) == 0: write('//') continue if not line.startswith('//'): write('// ', end = '') write(line) write('//') write(utils.make_divider()) write(toml_h) write('') output_str = output.getvalue().strip() # analyze the output to find any potentially missing #undefs if 1: re_define = re.compile(r'^\s*#\s*define\s+([a-zA-Z0-9_]+)(?:$|\s|\()') re_undef = re.compile(r'^\s*#\s*undef\s+([a-zA-Z0-9_]+)(?:$|\s|//)') defines = dict() for output_line in output_str.splitlines(): defined = True m = re_define.match(output_line) if not m: defined = False m = re_undef.match(output_line) if m: defines[m.group(1)] = defined ignore_list = ( # macros that are meant to stay public (user configs etc) r'INCLUDE_TOMLPLUSPLUS_H', r'POXY_IMPLEMENTATION_DETAIL', r'TOML_ALL_INLINE', r'TOML_API', r'TOML_CONCAT', r'TOML_CONCAT_1', r'TOML_CONFIG_HEADER', r'TOML_ENABLE_FORMATTERS', r'TOML_ENABLE_PARSER', r'TOML_ENABLE_SIMD', r'TOML_ENABLE_UNRELEASED_FEATURES', r'TOML_ENABLE_WINDOWS_COMPAT', r'TOML_EXCEPTIONS', r'TOML_EXTERN_TEMPLATES', r'TOML_HEADER_ONLY', r'TOML_LANG_MAJOR', r'TOML_LANG_MINOR', r'TOML_LANG_PATCH', r'TOML_LIB_MAJOR', r'TOML_LIB_MINOR', r'TOML_LIB_PATCH', r'TOML_LIB_SINGLE_HEADER', r'TOML_MAX_NESTED_VALUES', r'TOML_NAMESPACE_END', r'TOML_NAMESPACE_START', r'TOML_OPTIONAL_TYPE', r'TOML_SMALL_FLOAT_TYPE', r'TOML_SMALL_INT_TYPE', r'TOML_UNDEF_MACROS', r'TOMLPLUSPLUS_H', ) set_defines = [] for define, currently_set in defines.items(): if currently_set and define not in ignore_list: set_defines.append(define) if len(set_defines) > 0: set_defines.sort() print(f"Potentially missing #undefs:") for define in set_defines: print(f"\t#undef {define}") # write the output file output_file_path = Path(utils.entry_script_dir(), '..', 'toml.hpp').resolve() print("Writing to {}".format(output_file_path)) with open(output_file_path,'w', encoding='utf-8', newline='\n') as output_file: print(output_str, file=output_file)
def main(): # preprocess header(s) source_text = str(Preprocessor('toml.h')) # strip various things: # 'pragma once' source_text = re.sub(r'^\s*#\s*pragma\s+once\s*$', '', source_text, 0, re.I | re.M) # clang-format directives source_text = re.sub(r'^\s*//\s*clang-format\s+.+?$', '', source_text, 0, re.I | re.M) # spdx license identifiers source_text = re.sub(r'^\s*//\s*SPDX-License-Identifier:.+?$', '', source_text, 0, re.I | re.M) # 'magic' comment blocks (incl. doxygen) source_text = re.sub('(?:(?:\n|^)[ \t]*//[/#!<]+[^\n]*)+\n', '\n', source_text, 0, re.I | re.M) # 'magic' comments (incl. doxygen) source_text = re.sub('(?://[/#!<].*?)\n', '\n', source_text, 0, re.I | re.M) # remove trailing whitespace source_text = re.sub('([^ \t])[ \t]+\n', '\\1\n', source_text, 0, re.I | re.M) # bookended namespace blocks source_text = re.sub( '}\n+TOML_NAMESPACE_END\n+TOML_NAMESPACE_START\n+{\n+', '\n', source_text, 0, re.I | re.M) source_text = re.sub( '}\n+TOML_IMPL_NAMESPACE_END\n+TOML_IMPL_NAMESPACE_START\n+{\n+', '\n', source_text, 0, re.I | re.M) # blank lines before some preprocessor directives #source_text = re.sub('\n+\n(\s*)#\s*(elif|else|endif)(.*?)\n', '\n\\1#\\2\\3\n', source_text, 0, re.I | re.M) # blank lines after some preprocessor directives #source_text = re.sub('#\s*(if|ifn?def|elif|else)(.*?)\n\n+', '#\\1\\2\n', source_text, 0, re.I | re.M) # blank lines after opening braces source_text = re.sub('[{]\s*\n\s*\n+', '{\n', source_text, 0, re.I | re.M) # double newlines source_text = re.sub('\n(?:[ \t]*\n[ \t]*)+\n', '\n\n', source_text, 0, re.I | re.M) # source_text = re.sub( # blank lines between various preprocessor directives # '[#](endif(?:\s*//[^\n]*)?)\n{2,}[#](ifn?(?:def)?|define)', # '#\\1\n#\\2', # source_text, 0, re.I | re.M # ) return_type_pattern \ = r'(?:' \ + r'(?:\[\[nodiscard\]\]\s*)?' \ + r'(?:(?:friend|explicit|virtual|inline|const|operator)\s+)*' \ + r'(?:' \ + r'bool|int64_t|(?:const_)?iterator|double|void' \ + r'|node(?:_(?:view|of)<.+?>|)?|table|array|value(?:<.+?>)?' \ + r'|T|U|parse_(?:error|result)' \ + r')' \ + r'(?:\s*[&*]+)?' \ + r'(?:\s*[(]\s*[)])?' \ + r'\s+' \ + r')' blank_lines_between_returns_pattern = '({}[^\n]+)\n\n([ \t]*{})'.format( return_type_pattern, return_type_pattern) for i in range( 0, 5): # remove blank lines between simple one-liner definitions source_text = re.sub('(using .+?;)\n\n([ \t]*using)', '\\1\n\\2', source_text, 0, re.I | re.M) source_text = re.sub( '([a-zA-Z_][a-zA-Z0-9_]*[ \t]+[a-zA-Z_][a-zA-Z0-9_]*[ \t]*;)' \ + '\n\n([ \t]*[a-zA-Z_][a-zA-Z0-9_]*[ \t]+[a-zA-Z_][a-zA-Z0-9_]*[ \t]*;)', '\\1\n\\2', source_text, 0, re.I | re.M) source_text = re.sub(blank_lines_between_returns_pattern, '\\1\n\\2', source_text, 0, re.I | re.M) source_text = source_text.strip() + '\n' # change TOML_LIB_SINGLE_HEADER to 1 source_text = re.sub('#\s*define\s+TOML_LIB_SINGLE_HEADER\s+[0-9]+', '#define TOML_LIB_SINGLE_HEADER 1', source_text, 0, re.I) # extract library version library_version = {'major': 0, 'minor': 0, 'patch': 0} match = re.search(r'^\s*#\s*define\s+TOML_LIB_MAJOR\s+([0-9]+)\s*$', source_text, re.I | re.M) if match is not None: library_version['major'] = match.group(1) match = re.search(r'^\s*#\s*define\s+TOML_LIB_MINOR\s+([0-9]+)\s*$', source_text, re.I | re.M) if match is not None: library_version['minor'] = match.group(1) match = re.search( r'^\s*#\s*define\s+TOML_LIB_(?:REVISION|PATCH)\s+([0-9]+)\s*$', source_text, re.I | re.M) if match is not None: library_version['patch'] = match.group(1) # build the preamble (license etc) preamble = [] preamble.append(''' // toml++ v{major}.{minor}.{patch} // https://github.com/marzer/tomlplusplus // SPDX-License-Identifier: MIT'''.format(**library_version)) preamble.append(''' // - THIS FILE WAS ASSEMBLED FROM MULTIPLE HEADER FILES BY A SCRIPT - PLEASE DON'T EDIT IT DIRECTLY - // // If you wish to submit a contribution to toml++, hooray and thanks! Before you crack on, please be aware that this // file was assembled from a number of smaller files by a python script, and code contributions should not be made // against it directly. You should instead make your changes in the relevant source file(s). The file names of the files // that contributed to this header can be found at the beginnings and ends of the corresponding sections of this file.''' ) preamble.append(''' // TOML Language Specifications: // latest: https://github.com/toml-lang/toml/blob/master/README.md // v1.0.0-rc.2: https://toml.io/en/v1.0.0-rc.2 // v1.0.0-rc.1: https://toml.io/en/v1.0.0-rc.1 // v0.5.0: https://toml.io/en/v0.5.0 // changelog: https://github.com/toml-lang/toml/blob/master/CHANGELOG.md''') preamble.append( utils.read_all_text_from_file( path.join(utils.get_script_folder(), '..', 'LICENSE'))) # write the output with StringIO(newline='\n') as output: # build in a string buffer write = lambda txt, end='\n': print(txt, file=output, end=end) if (len(preamble) > 0): write(utils.make_divider()) for pre in preamble: write('//') for line in pre.strip().splitlines(): if len(line) == 0: write('//') continue if not line.startswith('//'): write('// ', end='') write(line) write('//') write(utils.make_divider()) write(source_text) write('') output_str = output.getvalue().strip() # analyze the output to find any potentially missing #undefs re_define = re.compile(r'^\s*#\s*define\s+([a-zA-Z0-9_]+)(?:$|\s|\()') re_undef = re.compile(r'^\s*#\s*undef\s+([a-zA-Z0-9_]+)(?:$|\s|//)') defines = dict() for output_line in output_str.splitlines(): defined = True m = re_define.match(output_line) if not m: defined = False m = re_undef.match(output_line) if m: defines[m.group(1)] = defined ignore_list = ( # macros that are meant to stay public (user configs etc) 'INCLUDE_TOMLPLUSPLUS_H', 'TOML_API', 'TOML_UNRELEASED_FEATURES', 'TOML_LARGE_FILES', 'TOML_PARSER', 'TOML_WINDOWS_COMPAT', 'TOML_EXCEPTIONS', 'TOML_LIB_SINGLE_HEADER', 'TOML_LIB_MAJOR', 'TOML_LIB_MINOR', 'TOML_LIB_PATCH', 'TOML_LANG_MAJOR', 'TOML_LANG_MINOR', 'TOML_LANG_PATCH', 'TOML_UNDEF_MACROS', 'TOML_HEADER_ONLY', 'TOML_ALL_INLINE') set_defines = [] for define, currently_set in defines.items(): if currently_set and define not in ignore_list: set_defines.append(define) if len(set_defines) > 0: set_defines.sort() print(f"Potentially missing #undefs:") for define in set_defines: print(f"\t#undef {define}") # write the output file output_file_path = path.join(utils.get_script_folder(), '..', 'toml.hpp') print("Writing to {}".format(output_file_path)) with open(output_file_path, 'w', encoding='utf-8', newline='\n') as output_file: print(output_str, file=output_file)
def main(): # preprocess header(s) source_text = Preprocessor()('toml.h') source_text = re.sub(r'^\s*#\s*pragma\s+once\s*$', '', source_text, 0, re.I | re.M) # 'pragma once' source_text = re.sub(r'^\s*//\s*clang-format\s+.+?$', '', source_text, 0, re.I | re.M) # clang-format directives source_text = re.sub(r'^\s*//\s*SPDX-License-Identifier:.+?$', '', source_text, 0, re.I | re.M) # spdx source_text = re.sub('(?:(?:\n|^)[ \t]*//[/#!<]+[^\n]*)+\n', '\n', source_text, 0, re.I | re.M) # remove 'magic' comment blocks source_text = re.sub('(?://[/#!<].*?)\n', '\n', source_text, 0, re.I | re.M) # remove 'magic' comments source_text = re.sub('([^ \t])[ \t]+\n', '\\1\n', source_text, 0, re.I | re.M) # remove trailing whitespace source_text = re.sub('\n(?:[ \t]*\n[ \t]*)+\n', '\n\n', source_text, 0, re.I | re.M) # remove double newlines # source_text = re.sub( # blank lines between various preprocessor directives # '[#](endif(?:\s*//[^\n]*)?)\n{2,}[#](ifn?(?:def)?|define)', # '#\\1\n#\\2', # source_text, 0, re.I | re.M # ) return_type_pattern \ = r'(?:' \ + r'(?:\[\[nodiscard\]\]\s*)?' \ + r'(?:(?:friend|explicit|virtual|inline|const|operator)\s+)*' \ + r'(?:' \ + r'bool|int64_t|(?:const_)?iterator|double|void' \ + r'|node(?:_(?:view|of)<.+?>|)?|table|array|value(?:<.+?>)?' \ + r'|T|U|parse_(?:error|result)' \ + r')' \ + r'(?:\s*[&*]+)?' \ + r'(?:\s*[(]\s*[)])?' \ + r'\s+' \ + r')' blank_lines_between_returns_pattern = '({}[^\n]+)\n\n([ \t]*{})'.format(return_type_pattern, return_type_pattern) for i in range(0, 5): # remove blank lines between simple one-liner definitions source_text = re.sub('(using .+?;)\n\n([ \t]*using)', '\\1\n\\2', source_text, 0, re.I | re.M) source_text = re.sub( '([a-zA-Z_][a-zA-Z0-9_]*[ \t]+[a-zA-Z_][a-zA-Z0-9_]*[ \t]*;)' \ + '\n\n([ \t]*[a-zA-Z_][a-zA-Z0-9_]*[ \t]+[a-zA-Z_][a-zA-Z0-9_]*[ \t]*;)', '\\1\n\\2', source_text, 0, re.I | re.M) source_text = re.sub(blank_lines_between_returns_pattern, '\\1\n\\2', source_text, 0, re.I | re.M) source_text = source_text.strip() + '\n' # extract library version library_version = { 'major': 0, 'minor': 0, 'patch': 0 } match = re.search(r'^\s*#\s*define\s+TOML_LIB_MAJOR\s+([0-9]+)\s*$', source_text, re.I | re.M) if match is not None: library_version['major'] = match.group(1) match = re.search(r'^\s*#\s*define\s+TOML_LIB_MINOR\s+([0-9]+)\s*$', source_text, re.I | re.M) if match is not None: library_version['minor'] = match.group(1) match = re.search(r'^\s*#\s*define\s+TOML_LIB_(?:REVISION|PATCH)\s+([0-9]+)\s*$', source_text, re.I | re.M) if match is not None: library_version['patch'] = match.group(1) # build the preamble (license etc) preamble = [] preamble.append(''' toml++ v{major}.{minor}.{patch} https://github.com/marzer/tomlplusplus SPDX-License-Identifier: MIT'''.format(**library_version)) preamble.append(''' - THIS FILE WAS ASSEMBLED FROM MULTIPLE HEADER FILES BY A SCRIPT - PLEASE DON'T EDIT IT DIRECTLY - If you wish to submit a contribution to toml++, hooray and thanks! Before you crack on, please be aware that this file was assembled from a number of smaller files by a python script, and code contributions should not be made against it directly. You should instead make your changes in the relevant source file(s). The file names of the files that contributed to this header can be found at the beginnings and ends of the corresponding sections of this file.''') preamble.append(''' TOML language specifications: Latest: https://github.com/toml-lang/toml/blob/master/README.md v1.0.0-rc.1: https://toml.io/en/v1.0.0-rc.1 v0.5.0: https://toml.io/en/v0.5.0''') preamble.append(utils.read_all_text_from_file(path.join(utils.get_script_folder(), '..', 'LICENSE'))) # write the output file output_file_path = path.join(utils.get_script_folder(), '..', 'toml.hpp') print("Writing to {}".format(output_file_path)) with open(output_file_path,'w', encoding='utf-8', newline='\n') as output_file: if (len(preamble) > 0): print(utils.make_divider(), file=output_file) for pre in preamble: print('//', file=output_file) for line in pre.strip().splitlines(): print('//', file=output_file, end = '') if (len(line) > 0): print(' ', file=output_file, end = '') print(line, file=output_file) else: print('\n', file=output_file, end = '') print('//', file=output_file) print(utils.make_divider(), file=output_file) print('''// clang-format off #ifndef INCLUDE_TOMLPLUSPLUS_H #define INCLUDE_TOMLPLUSPLUS_H #define TOML_LIB_SINGLE_HEADER 1 ''', file=output_file) print(source_text, file=output_file) print(''' #endif // INCLUDE_TOMLPLUSPLUS_H // clang-format on''', file=output_file)
def __init__(self, file_path, name, is_valid_case): self.__name = name self.__identifier = sanitize(self.__name) self.__group = self.__identifier.strip('_').split('_')[0] # read file self.__raw = True self.__bytes = False with open(file_path, "rb") as f: self.__source = f.read() # if we find a utf-16 or utf-32 BOM, treat the file as bytes if len(self.__source) >= 4: prefix = self.__source[:4] if prefix == b'\x00\x00\xFE\xFF' or prefix == b'\xFF\xFE\x00\x00': self.__bytes = True if len(self.__source) >= 2: prefix = self.__source[:2] if prefix == b'\xFF\xFE' or prefix == b'\xFE\xFF': self.__bytes = True # if we find a utf-8 BOM, treat it as a string but don't use a raw string literal if not self.__bytes and len(self.__source) >= 3: prefix = self.__source[:3] if prefix == b'\xEF\xBB\xBF': self.__raw = False # if we're not treating it as bytes, decode the bytes into a utf-8 string if not self.__bytes: try: self.__source = str(self.__source, encoding='utf-8') # disable raw literals if the string contains some things that should be escaped for c in self.__source: if is_problematic_control_char(c): self.__raw = False break # disable raw literals if the string has trailing backslashes followed by whitespace on the same line # (GCC doesn't like it and generates some noisy warnings) if self.__raw and re.search(r'\\[ \t]+?\n', self.__source, re.S): self.__raw = False except UnicodeDecodeError: self.__bytes = True # strip off trailing newlines for non-byte strings (they're just noise) if not self.__bytes: while self.__source.endswith('\r\n'): self.__source = self.__source[:-2] self.__source = self.__source.rstrip('\n') # parse preprocessor conditions self.__conditions = [] if is_valid_case: self.__expected = True path_base = str(Path(file_path.parent, file_path.stem)) yaml_file = Path(path_base + r'.yaml') if yaml_file.exists(): self.__expected = python_to_tomlpp( yaml.load(utils.read_all_text_from_file(yaml_file, logger=True), Loader=yaml.FullLoader)) else: json_file = Path(path_base + r'.json') if json_file.exists(): self.__expected = python_to_tomlpp( json_to_python( json.loads( utils.read_all_text_from_file(json_file, logger=True), ))) else: self.__expected = False