def get_option_db(): result = {} for name, value in SETUP_INFO.iteritems(): if name.find("XX_") == 0: continue # DEPRECATED elif type(value) != list: continue # derived setup option option_list, default = value result.update((option, default) for option in option_list) return result
def __interpret_command_line(argv): command_line = GetPot(argv) if command_line.search("--version", "-v"): print "Quex - Fast Universal Lexical Analyzer Generator" print "Version " + QUEX_VERSION print "(C) 2005-2012 Frank-Rene Schaefer" print "ABSOLUTELY NO WARRANTY" return None if command_line.search("--help", "-h"): print "Quex - Fast Universal Lexical Analyzer Generator" print "Please, consult the quex documentation for further help, or" print "visit http://quex.org" print "(C) 2005-2012 Frank-Rene Schaefer" print "ABSOLUTELY NO WARRANTY" return None for variable_name, info in SETUP_INFO.items(): # Some parameters are not set on the command line. Their entry is not associated # with a description list. if type(info) != list: continue if info[1] == SetupParTypes.FLAG: setup.__dict__[variable_name] = command_line.search(info[0]) elif info[1] == SetupParTypes.NEGATED_FLAG: setup.__dict__[variable_name] = not command_line.search(info[0]) elif info[1] == SetupParTypes.LIST: if not command_line.search(info[0]): setup.__dict__[variable_name] = [] else: the_list = command_line.nominus_followers(info[0]) if len(the_list) == 0: error_msg("Option %s\nnot followed by anything." % repr(info[0])[1:-1]) if setup.__dict__.has_key(variable_name): for element in the_list: if element not in setup.__dict__[variable_name]: setup.__dict__[variable_name].extend(the_list) else: setup.__dict__[variable_name] = list(set(the_list)) elif command_line.search(info[0]): if not command_line.search(info[0]): setup.__dict__[variable_name] = info[1] else: value = command_line.follow("--EMPTY--", info[0]) if value == "--EMPTY--": error_msg("Option %s\nnot followed by anything." % repr(info[0])[1:-1]) setup.__dict__[variable_name] = value return command_line
def argv_ufo_detections(Cl): """Detects unidentified command line options. """ known_option_list = [] for info in SETUP_INFO.itervalues(): if type(info) != list: continue known_option_list.extend(info[0]) ufo_list = Cl.unidentified_options(known_option_list) if not ufo_list: return option_str = "".join("%s\n" % ufo_list) error.log("Following command line options are unknown to current version of quex:\n" \ + option_str, SuppressCode=NotificationDB.error_ufo_on_command_line_f)
def argv_interpret(argv): """RETURNS: QueryF -- True, if quex is run in query mode. False, if it is run in code generation mode. Setup -- information about the command line. """ command_line = GetPot(argv, SectionsEnabledF=False) query_f = None command_line.disable_loop() for variable_name, info in SETUP_INFO.items(): if type(info) != list: # Parameter not set on command line? continue # => skip. command_line.reset_cursor() if not command_line.search(info[0]): continue query_f = argv_is_query_option(command_line, info[0], variable_name, query_f) if info[1] == SetupParTypes.FLAG: value = argv_catch_flag(command_line, info[0], None) elif info[1] == SetupParTypes.NEGATED_FLAG: value = argv_catch_negated_flag(command_line, info[0], None) elif info[1] == SetupParTypes.INT_LIST: value = argv_catch_int_list(command_line, variable_name, info[0], []) elif info[1] == SetupParTypes.LIST: value = argv_catch_list(command_line, info[0], []) elif isinstance(info[1], (int, long)): value = argv_catch_int(command_line, info[0], info[1]) else: value = argv_catch_string(command_line, info[0], info[1]) setup.set(variable_name, info[1], value) # Handle unidentified command line options. argv_ufo_detections(command_line) return query_f, command_line
def do(setup, command_line, argv): """Does a consistency check for setup and the command line. """ if setup.extern_token_id_file_show_f and not setup.extern_token_id_file: error.log("Option '%s' cannot be used without\n" % _example_flag("extern_token_id_file_show_f") + "option '%s'." % _example_flag("extern_token_id_file")) # if the mode is '--language dot' => check character display options. if setup.character_display not in ["hex", "utf8"]: error.log( "Character display must be either 'hex' or 'utf8'.\nFound: '%s'" % setup.character_display) # ensure that options are not specified twice for parameter, info in SETUP_INFO.items(): if type(info) != list: continue occurence_n = 0 for option in info[0]: occurence_n += argv.count(option) if occurence_n > 1 and info[1] not in (SetupParTypes.LIST, SetupParTypes.INT_LIST): error.log("Received more than one of the following options:\n" + \ "%s" % repr(info[0])[1:-1]) # (*) Check for 'Depraceted' Options ___________________________________________________ for name, info in DEPRECATED.items(): command_line_options = SETUP_INFO[name][0] comment = info[0] depreciated_since_version = info[1] for option in command_line_options: if command_line.search(option): error.log("Command line option '%s' is ignored.\n" % option + \ comment + "\n" + \ "Last version of Quex supporting this option is version %s. Please, visit\n" % \ depreciated_since_version + \ "http://quex.org for further information.") # (*) Check for 'Straying' Options ___________________________________________________ options = [] for key, info in SETUP_INFO.items(): if type(info) != list: continue if key in DEPRECATED: continue if info[1] is not None: options.extend(info[0]) options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", ""))) ufos = command_line.unidentified_options(options) if len(ufos) != 0: error.log("Unidentified option(s) = " + repr(ufos) + "\n" + \ __get_supported_command_line_option_description(options)) if setup.analyzer_derived_class_name != "" and \ setup.analyzer_derived_class_file == "": error.log("Specified derived class '%s' on command line, but it was not\n" % \ setup.analyzer_derived_class_name + \ "specified which file contains the definition of it.\n" + \ "use command line option '--derived-class-file'.\n") if setup.lexatom.size_in_byte not in [-1, 1, 2, 4]: example_flag = SETUP_INFO["__buffer_lexatom_size_in_byte"][0][0] error.log("The setting of '%s' can only be\n" % example_flag + "1, 2, or 4 (found %s)." % repr(setup.lexatom.size_in_byte)) # Manually written token class requires token class name to be specified if setup.extern_token_class_file: if not setup.token_class: error.log( "The use of a manually written token class requires that the name of the class\n" "is specified on the command line via the '--token-class' option." ) if setup.converter_only_f: if not setup.lexatom.type: error.log( "Lexatom type must be specific for converter generation.") if not _find_flag("buffer_encoding_name", argv): error.log( "Lexeme-converter-only-mode requires explicit definition of encoding.\n" "Example: '%s unicode'." % _example_flag("buffer_encoding_name")) if not _find_flag("__buffer_lexatom_type", argv): error.log( "Lexeme-converter-only-mode requires explicit definition of the code unit type.\n" "Example: '%s uint8_t'." % _example_flag("__buffer_lexatom_type")) # Check that names are valid identifiers if setup.token_id_prefix_plain: __check_identifier(setup, "token_id_prefix_plain", "Token prefix") __check_identifier(setup, "analyzer_class_name", "Engine name") if setup.analyzer_derived_class_name != "": __check_identifier(setup, "analyzer_derived_class_name", "Derived class name") __check_file_name(setup, "extern_token_class_file", "file containing token class definition") __check_file_name(setup, "analyzer_derived_class_file", "file containing user derived lexer class") __check_file_name(setup, "extern_token_id_file", "file containing user token ids", 0, CommandLineOption=SETUP_INFO["extern_token_id_file"]) __check_file_name(setup, "input_mode_files", "quex source file") # Internal engine character encoding if setup.buffer_encoding.name not in ("utf32", "unicode"): if not setup.buffer_encoding_file: error.verify_word_in_list( setup.buffer_encoding_name, codec_db.get_supported_codec_list() + ["utf8", "utf16", "utf32"], "Codec '%s' is not supported." % setup.buffer_encoding.name) # NOT: __check_codec_vs_buffer_lexatom_size_in_byte("utf8", 1) # BECAUSE: Code unit size is one. No type has a size of less than one byte! __check_codec_vs_buffer_lexatom_size_in_byte(setup, "utf16", 2)
def do(setup, command_line, argv): """Does a consistency check for setup and the command line. """ setup.output_directory = os.path.normpath(setup.output_directory) if setup.output_directory != "": # Check, if the output directory exists if os.access(setup.output_directory, os.F_OK) == False: error_msg("The directory %s was specified for output, but does not exists." % setup.output_directory) if os.access(setup.output_directory, os.W_OK) == False: error_msg("The directory %s was specified for output, but is not writeable." % setup.output_directory) # if the mode is 'plotting', then check wether a graphic format is speicified for plot_option in SETUP_INFO["plot_graphic_format"][0]: if plot_option in argv and setup.plot_graphic_format == "": error_msg("Option '%s' must be followed by a graphic format specifier (bmp, svg, jpg, ...)" % \ plot_option) if setup.plot_character_display not in ["hex", "utf8"]: error_msg("Plot character display must be either 'hex' or 'utf8'.\nFound: '%s'" % setup.plot_character_display) # ensure that options are not specified twice for parameter, info in SETUP_INFO.items(): if type(info) != list: continue occurence_n = 0 for option in info[0]: occurence_n += argv.count(option) if occurence_n > 1: error_msg("Received more than one of the following options:\n" + \ "%s" % repr(info[0])[1:-1]) # (*) Check for 'Depraceted' Options ___________________________________________________ for name, info in DEPRECATED.items(): command_line_options = SETUP_INFO[name][0] comment = info[0] depreciated_since_version = info[1] for option in command_line_options: if command_line.search(option): error_msg("Command line option '%s' is ignored.\n" % option + \ comment + "\n" + \ "Last version of Quex supporting this option is version %s. Please, visit\n" % \ depreciated_since_version + \ "http://quex.org for further information.") # (*) Check for 'Straying' Options ___________________________________________________ options = [] for key, info in SETUP_INFO.items(): if type(info) != list: continue if key in DEPRECATED: continue if info[1] != None: options.extend(info[0]) options.sort(lambda a,b: cmp(a.replace("-",""), b.replace("-",""))) ufos = command_line.unidentified_options(options) if ufos != []: error_msg("Unidentified option(s) = " + repr(ufos) + "\n" + \ __get_supported_command_line_option_description(options)) if setup.analyzer_derived_class_name != "" and \ setup.analyzer_derived_class_file == "": error_msg("Specified derived class '%s' on command line, but it was not\n" % \ setup.analyzer_derived_class_name + \ "specified which file contains the definition of it.\n" + \ "use command line option '--derived-class-file'.\n") if setup.buffer_element_size not in [-1, 1, 2, 4]: error_msg("The setting of '--buffer-element-size' (or '-b') can only be\n" "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size)) if setup.buffer_byte_order not in ["<system>", "little", "big"]: error_msg("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \ "Note, that this option is only interesting for cross plattform development.\n" + \ "By default, quex automatically chooses the endian type of your system.") # Manually written token class requires token class name to be specified if setup.token_class_file != "" and command_line.search("--token-class", "--tc") == False: error_msg("The use of a manually written token class requires that the name of the class\n" "is specified on the command line via the '--token-class' option.") # Token queue if setup.token_policy != "queue" and command_line.search("--token-queue-size"): error_msg("Option --token-queue-size determines a fixed token queue size. This makes\n" + \ "only sense in conjunction with '--token-policy queue'.\n") if setup.token_queue_size <= setup.token_queue_safety_border + 1: if setup.token_queue_size == setup.token_queue_safety_border: cmp_str = "equal to" else: cmp_str = "less than" error_msg("Token queue size is %i is %s token queue safety border %i + 1.\n" % \ (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) + "Set appropriate values with --token-queue-size and --token-queue-safety-border.") # Check that names are valid identifiers __check_identifier(setup, "token_id_prefix_plain", "Token prefix") __check_identifier(setup, "analyzer_class_name", "Engine name") if setup.analyzer_derived_class_name != "": __check_identifier(setup, "analyzer_derived_class_name", "Derived class name") __check_file_name(setup, "token_class_file", "file containing token class definition") __check_file_name(setup, "analyzer_derived_class_file", "file containing user derived lexer class") __check_file_name(setup, "token_id_foreign_definition_file", "file containing user token ids") __check_file_name(setup, "input_mode_files", "quex source file") # Check that not more than one converter is specified converter_n = 0 if setup.converter_iconv_f: converter_n += 1 if setup.converter_icu_f: converter_n += 1 if setup.converter_user_new_func != "": converter_n += 1 if converter_n > 1: error_msg("More than one character converter has been specified. Note, that the\n" + \ "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n" + \ "to be used mutually exclusively.") if converter_n == 1 and setup.buffer_codec != "": error_msg("An engine that is to be generated for a specific codec cannot rely\n" + \ "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \ "`--converter-new`.") # If a user defined type is specified for 'engine character type' and # a converter, then the name of the target type must be specified explicitly. if setup.buffer_element_type != "" \ and not global_character_type_db.has_key(setup.buffer_element_type) \ and setup.converter_ucs_coding_name == "" \ and converter_n != 0: tc = setup.buffer_element_type error_msg("A character code converter has been specified. It is supposed to convert\n" + \ "incoming data into an internal buffer of unicode characters. The size of\n" + \ "each character is determined by '%s' which is a user defined type.\n" % tc + \ "\n" + \ "Quex cannot determine automatically the name that the converter requires\n" + \ "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \ "command line option '--converter-ucs-coding-name' or '--cucn'.") # Token transmission policy token_policy_list = ["queue", "single", "users_token", "users_queue"] if setup.token_policy not in token_policy_list: error_msg("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \ repr(token_policy_list)[1:-1]) elif setup.token_policy == "users_token": error_msg("Token policy 'users_queue' has be deprecated since 0.49.1. Use\n" "equivalent policy 'single'.") elif setup.token_policy == "users_queue": error_msg("Token policy 'users_queue' has be deprecated since 0.49.1\n") # Internal engine character encoding def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize): if setup.buffer_codec != CodecName: return elif setup.buffer_element_size == RequiredBufferElementSize: return if setup.buffer_element_size == -1: msg_str = "undetermined (found type '%s')" % setup.buffer_element_type else: msg_str = "is not %i (found %i)" % (RequiredBufferElementSize, setup.buffer_element_size) error_msg("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) + "Consult command line argument '--buffer-element-size'.") if setup.buffer_codec != "": verify_word_in_list(setup.buffer_codec, codec_db.get_supported_codec_list() + ["utf8", "utf16"], "Codec '%s' is not supported." % setup.buffer_codec) __codec_vs_buffer_element_size("utf8", 1) __codec_vs_buffer_element_size("utf16", 2) # Path Compression if setup.compression_path_uniform_f and setup.compression_path_f: error_msg("Both flags for path compression were set: '--path-compression' and\n" "'--path-compression-uniform'. Please, choose only one!")
def do(setup, command_line, argv): """Does a consistency check for setup and the command line. """ setup.output_directory = os.path.normpath(setup.output_directory) if setup.output_directory: # Check, if the output directory exists if os.access(setup.output_directory, os.F_OK) == False: error.log("The directory %s was specified for output, but does not exists." % setup.output_directory) if os.access(setup.output_directory, os.W_OK) == False: error.log("The directory %s was specified for output, but is not writeable." % setup.output_directory) # if the mode is '--language dot' => check character display options. if setup.character_display not in ["hex", "utf8"]: error.log("Character display must be either 'hex' or 'utf8'.\nFound: '%s'" % setup.character_display) # ensure that options are not specified twice for parameter, info in SETUP_INFO.items(): if type(info) != list: continue occurence_n = 0 for option in info[0]: occurence_n += argv.count(option) if occurence_n > 1 and info[1] not in (SetupParTypes.LIST, SetupParTypes.INT_LIST): error.log("Received more than one of the following options:\n" + \ "%s" % repr(info[0])[1:-1]) # (*) Check for 'Depraceted' Options ___________________________________________________ for name, info in DEPRECATED.items(): command_line_options = SETUP_INFO[name][0] comment = info[0] depreciated_since_version = info[1] for option in command_line_options: if command_line.search(option): error.log("Command line option '%s' is ignored.\n" % option + \ comment + "\n" + \ "Last version of Quex supporting this option is version %s. Please, visit\n" % \ depreciated_since_version + \ "http://quex.org for further information.") # (*) Check for 'Straying' Options ___________________________________________________ options = [] for key, info in SETUP_INFO.items(): if type(info) != list: continue if key in DEPRECATED: continue if info[1] is not None: options.extend(info[0]) options.sort(lambda a,b: cmp(a.replace("-",""), b.replace("-",""))) ufos = command_line.unidentified_options(options) if len(ufos) != 0: error.log("Unidentified option(s) = " + repr(ufos) + "\n" + \ __get_supported_command_line_option_description(options)) if setup.analyzer_derived_class_name != "" and \ setup.analyzer_derived_class_file == "": error.log("Specified derived class '%s' on command line, but it was not\n" % \ setup.analyzer_derived_class_name + \ "specified which file contains the definition of it.\n" + \ "use command line option '--derived-class-file'.\n") if setup.buffer_element_size not in [-1, 1, 2, 4]: error.log("The setting of '--buffer-element-size' (or '-b') can only be\n" "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size)) if setup.buffer_byte_order not in ["<system>", "little", "big"]: error.log("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \ "Note, that this option is only interesting for cross plattform development.\n" + \ "By default, quex automatically chooses the endian type of your system.") # Manually written token class requires token class name to be specified if setup.token_class_file != "" and command_line.search("--token-class", "--tc") == False: error.log("The use of a manually written token class requires that the name of the class\n" "is specified on the command line via the '--token-class' option.") # Token queue if setup.token_policy != "queue" and command_line.search("--token-queue-size"): error.log("Option --token-queue-size determines a fixed token queue size. This makes\n" + \ "only sense in conjunction with '--token-policy queue'.\n") if setup.token_queue_size <= setup.token_queue_safety_border + 1: if setup.token_queue_size == setup.token_queue_safety_border: cmp_str = "equal to" else: cmp_str = "less than" error.log("Token queue size is %i is %s token queue safety border %i + 1.\n" % \ (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) + "Set appropriate values with --token-queue-size and --token-queue-safety-border.") # Check that names are valid identifiers if len(setup.token_id_prefix_plain) != 0: __check_identifier(setup, "token_id_prefix_plain", "Token prefix") __check_identifier(setup, "analyzer_class_name", "Engine name") if setup.analyzer_derived_class_name != "": __check_identifier(setup, "analyzer_derived_class_name", "Derived class name") __check_file_name(setup, "token_class_file", "file containing token class definition") __check_file_name(setup, "analyzer_derived_class_file", "file containing user derived lexer class") __check_file_name(setup, "token_id_foreign_definition_file", "file containing user token ids", 0, CommandLineOption=SETUP_INFO["token_id_foreign_definition"][0]) __check_file_name(setup, "input_mode_files", "quex source file") # Check that not more than one converter is specified converter_n = 0 if setup.converter_iconv_f: converter_n += 1 if setup.converter_icu_f: converter_n += 1 if len(setup.converter_user_new_func) != 0: converter_n += 1 if converter_n > 1: error.log("More than one character converter has been specified. Note, that the\n" + \ "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n" + \ "to be used mutually exclusively.") if converter_n == 1 and setup.buffer_codec.name != "unicode": # If the buffer codec is other than unicode, then no converter shall # be used to fill the buffer. Instead, the engine is transformed, so # that it works directly on the codec. error.log("An engine that is to be generated for a specific codec cannot rely\n" + \ "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \ "`--converter-new`.") # If a converter has been specified and no bytes-element-size has been specified, # it defaults to '1 byte' which is most likely not what is desired for unicode. if converter_n == 1 \ and setup.buffer_element_size == 1 \ and not command_line_args_defined(command_line, "buffer_element_size") \ and not command_line_args_defined(command_line, "buffer_element_type"): error.log("A converter has been specified, but the default buffer element size\n" + \ "is left to 1 byte. Consider %s or %s." \ % (command_line_args_string("buffer_element_size"), command_line_args_string("buffer_element_type"))) # If a user defined type is specified for 'engine character type' and # a converter, then the name of the target type must be specified explicitly. if setup.buffer_element_type != "" \ and not global_character_type_db.has_key(setup.buffer_element_type) \ and setup.converter_ucs_coding_name == "" \ and converter_n != 0: tc = setup.buffer_element_type error.log("A character code converter has been specified. It is supposed to convert\n" + \ "incoming data into an internal buffer of unicode characters. The size of\n" + \ "each character is determined by '%s' which is a user defined type.\n" % tc + \ "\n" + \ "Quex cannot determine automatically the name that the converter requires\n" + \ "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \ "command line option %s." \ % command_line_args_string("converter_ucs_coding_name")) # Token transmission policy token_policy_list = ["queue", "single", "users_token", "users_queue"] if setup.token_policy not in token_policy_list: error.log("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \ repr(token_policy_list)[1:-1]) elif setup.token_policy == "users_token": error.log("Token policy 'users_queue' has be deprecated since 0.49.1. Use\n" "equivalent policy 'single'.") elif setup.token_policy == "users_queue": error.log("Token policy 'users_queue' has be deprecated since 0.49.1\n") # Internal engine character encoding def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize): if setup.buffer_codec.name != CodecName: return elif setup.buffer_element_size == RequiredBufferElementSize: return if setup.buffer_element_size == -1: msg_str = "undetermined (found type '%s')" % setup.buffer_element_type else: msg_str = "is not %i (found %i)" % (RequiredBufferElementSize, setup.buffer_element_size) error.log("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) + "Consult command line argument %s" \ % command_line_args_string("buffer_element_size")) if setup.buffer_codec.name != "unicode": if not setup.buffer_codec_file: error.verify_word_in_list(setup.buffer_codec_name, codec_db.get_supported_codec_list() + ["utf8", "utf16"], "Codec '%s' is not supported." % setup.buffer_codec.name) __codec_vs_buffer_element_size("utf8", 1) __codec_vs_buffer_element_size("utf16", 2) if setup.external_lexeme_null_object and setup.token_class_only_f: error.log("Specifying an external lexeme null object signalizes an\n" "external token class implementation. The 'token class only\n" "flag' generates a token class considered to be externally\n" "shared. Both flags are mutually exclusive.") if setup.string_accumulator_f: error_n = NotificationDB.warning_on_no_token_class_take_text if error_n in setup.suppressed_notification_list: error.warning("The warning upon missing 'take_text' in token type definition is de-\n" + "activated by '--suppress %i'. This is dangerous, if there is a string\n" % error_n + "accumulator. May be, use '--no-string-accumulator'.", -1, SuppressCode=NotificationDB.warning_on_no_warning_on_missing_take_text)
def do(setup, command_line, argv): """Does a consistency check for setup and the command line. """ setup.output_directory = os.path.normpath(setup.output_directory) if setup.output_directory: # Check, if the output directory exists if os.access(setup.output_directory, os.F_OK) == False: error_msg( "The directory %s was specified for output, but does not exists." % setup.output_directory) if os.access(setup.output_directory, os.W_OK) == False: error_msg( "The directory %s was specified for output, but is not writeable." % setup.output_directory) # if the mode is '--language dot' => check character display options. if setup.character_display not in ["hex", "utf8"]: error_msg( "Character display must be either 'hex' or 'utf8'.\nFound: '%s'" % setup.character_display) # ensure that options are not specified twice for parameter, info in SETUP_INFO.items(): if type(info) != list: continue occurence_n = 0 for option in info[0]: occurence_n += argv.count(option) if occurence_n > 1 and info[1] not in (SetupParTypes.LIST, SetupParTypes.INT_LIST): error_msg("Received more than one of the following options:\n" + \ "%s" % repr(info[0])[1:-1]) # (*) Check for 'Depraceted' Options ___________________________________________________ for name, info in DEPRECATED.items(): command_line_options = SETUP_INFO[name][0] comment = info[0] depreciated_since_version = info[1] for option in command_line_options: if command_line.search(option): error_msg("Command line option '%s' is ignored.\n" % option + \ comment + "\n" + \ "Last version of Quex supporting this option is version %s. Please, visit\n" % \ depreciated_since_version + \ "http://quex.org for further information.") # (*) Check for 'Straying' Options ___________________________________________________ options = [] for key, info in SETUP_INFO.items(): if type(info) != list: continue if key in DEPRECATED: continue if info[1] is not None: options.extend(info[0]) options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", ""))) ufos = command_line.unidentified_options(options) if len(ufos) != 0: error_msg("Unidentified option(s) = " + repr(ufos) + "\n" + \ __get_supported_command_line_option_description(options)) if setup.analyzer_derived_class_name != "" and \ setup.analyzer_derived_class_file == "": error_msg("Specified derived class '%s' on command line, but it was not\n" % \ setup.analyzer_derived_class_name + \ "specified which file contains the definition of it.\n" + \ "use command line option '--derived-class-file'.\n") if setup.buffer_element_size not in [-1, 1, 2, 4]: error_msg( "The setting of '--buffer-element-size' (or '-b') can only be\n" "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size)) if setup.buffer_byte_order not in ["<system>", "little", "big"]: error_msg("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \ "Note, that this option is only interesting for cross plattform development.\n" + \ "By default, quex automatically chooses the endian type of your system.") # Manually written token class requires token class name to be specified if setup.token_class_file != "" and command_line.search( "--token-class", "--tc") == False: error_msg( "The use of a manually written token class requires that the name of the class\n" "is specified on the command line via the '--token-class' option.") # Token queue if setup.token_policy != "queue" and command_line.search( "--token-queue-size"): error_msg("Option --token-queue-size determines a fixed token queue size. This makes\n" + \ "only sense in conjunction with '--token-policy queue'.\n") if setup.token_queue_size <= setup.token_queue_safety_border + 1: if setup.token_queue_size == setup.token_queue_safety_border: cmp_str = "equal to" else: cmp_str = "less than" error_msg("Token queue size is %i is %s token queue safety border %i + 1.\n" % \ (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) + "Set appropriate values with --token-queue-size and --token-queue-safety-border.") # Check that names are valid identifiers if len(setup.token_id_prefix_plain) != 0: __check_identifier(setup, "token_id_prefix_plain", "Token prefix") __check_identifier(setup, "analyzer_class_name", "Engine name") if setup.analyzer_derived_class_name != "": __check_identifier(setup, "analyzer_derived_class_name", "Derived class name") __check_file_name(setup, "token_class_file", "file containing token class definition") __check_file_name(setup, "analyzer_derived_class_file", "file containing user derived lexer class") __check_file_name( setup, "token_id_foreign_definition_file", "file containing user token ids", 0, CommandLineOption=SETUP_INFO["token_id_foreign_definition"][0]) __check_file_name(setup, "input_mode_files", "quex source file") # Check that not more than one converter is specified converter_n = 0 if setup.converter_iconv_f: converter_n += 1 if setup.converter_icu_f: converter_n += 1 if len(setup.converter_user_new_func) != 0: converter_n += 1 if converter_n > 1: error_msg("More than one character converter has been specified. Note, that the\n" + \ "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n" + \ "to be used mutually exclusively.") if converter_n == 1 and setup.buffer_codec.name != "unicode": # If the buffer codec is other than unicode, then no converter shall # be used to fill the buffer. Instead, the engine is transformed, so # that it works directly on the codec. error_msg("An engine that is to be generated for a specific codec cannot rely\n" + \ "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \ "`--converter-new`.") # If a converter has been specified and no bytes-element-size has been specified, # it defaults to '1 byte' which is most likely not what is desired for unicode. if converter_n == 1 \ and setup.buffer_element_size == 1 \ and not command_line_args_defined(command_line, "buffer_element_size") \ and not command_line_args_defined(command_line, "buffer_element_type"): error_msg("A converter has been specified, but the default buffer element size\n" + \ "is left to 1 byte. Consider %s or %s." \ % (command_line_args_string("buffer_element_size"), command_line_args_string("buffer_element_type"))) # If a user defined type is specified for 'engine character type' and # a converter, then the name of the target type must be specified explicitly. if setup.buffer_element_type != "" \ and not global_character_type_db.has_key(setup.buffer_element_type) \ and setup.converter_ucs_coding_name == "" \ and converter_n != 0: tc = setup.buffer_element_type error_msg("A character code converter has been specified. It is supposed to convert\n" + \ "incoming data into an internal buffer of unicode characters. The size of\n" + \ "each character is determined by '%s' which is a user defined type.\n" % tc + \ "\n" + \ "Quex cannot determine automatically the name that the converter requires\n" + \ "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \ "command line option %s." \ % command_line_args_string("converter_ucs_coding_name")) # Token transmission policy token_policy_list = ["queue", "single", "users_token", "users_queue"] if setup.token_policy not in token_policy_list: error_msg("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \ repr(token_policy_list)[1:-1]) elif setup.token_policy == "users_token": error_msg( "Token policy 'users_queue' has be deprecated since 0.49.1. Use\n" "equivalent policy 'single'.") elif setup.token_policy == "users_queue": error_msg( "Token policy 'users_queue' has be deprecated since 0.49.1\n") # Internal engine character encoding def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize): if setup.buffer_codec.name != CodecName: return elif setup.buffer_element_size == RequiredBufferElementSize: return if setup.buffer_element_size == -1: msg_str = "undetermined (found type '%s')" % setup.buffer_element_type else: msg_str = "is not %i (found %i)" % (RequiredBufferElementSize, setup.buffer_element_size) error_msg("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) + "Consult command line argument %s" \ % command_line_args_string("buffer_element_size")) if setup.buffer_codec.name != "unicode": if not setup.buffer_codec_file: verify_word_in_list( setup.buffer_codec_name, codec_db.get_supported_codec_list() + ["utf8", "utf16"], "Codec '%s' is not supported." % setup.buffer_codec.name) __codec_vs_buffer_element_size("utf8", 1) __codec_vs_buffer_element_size("utf16", 2) if setup.external_lexeme_null_object and setup.token_class_only_f: error_msg( "Specifying an external lexeme null object signalizes an\n" "external token class implementation. The 'token class only\n" "flag' generates a token class considered to be externally\n" "shared. Both flags are mutually exclusive.") if setup.string_accumulator_f: error_n = NotificationDB.warning_on_no_token_class_take_text if error_n in setup.suppressed_notification_list: error_msg( "The warning upon missing 'take_text' in token type definition is de-\n" + "activated by '--suppress %i'. This is dangerous, if there is a string\n" % error_n + "accumulator. May be, use '--no-string-accumulator'.", DontExitF=True, WarningF=True, SuppressCode=NotificationDB. warning_on_no_warning_on_missing_take_text)
def do(setup, command_line, argv): """Does a consistency check for setup and the command line. """ setup.output_directory = os.path.normpath(setup.output_directory) if setup.output_directory != "": # Check, if the output directory exists if os.access(setup.output_directory, os.F_OK) == False: error_msg( "The directory %s was specified for output, but does not exists." % setup.output_directory) if os.access(setup.output_directory, os.W_OK) == False: error_msg( "The directory %s was specified for output, but is not writeable." % setup.output_directory) # if the mode is 'plotting', then check wether a graphic format is speicified for plot_option in SETUP_INFO["plot_graphic_format"][0]: if plot_option in argv and setup.plot_graphic_format == "": error_msg("Option '%s' must be followed by a graphic format specifier (bmp, svg, jpg, ...)" % \ plot_option) if setup.plot_character_display not in ["hex", "utf8"]: error_msg( "Plot character display must be either 'hex' or 'utf8'.\nFound: '%s'" % setup.plot_character_display) # ensure that options are not specified twice for parameter, info in SETUP_INFO.items(): if type(info) != list: continue occurence_n = 0 for option in info[0]: occurence_n += argv.count(option) if occurence_n > 1: error_msg("Received more than one of the following options:\n" + \ "%s" % repr(info[0])[1:-1]) # (*) Check for 'Depraceted' Options ___________________________________________________ for name, info in DEPRECATED.items(): command_line_options = SETUP_INFO[name][0] comment = info[0] depreciated_since_version = info[1] for option in command_line_options: if command_line.search(option): error_msg("Command line option '%s' is ignored.\n" % option + \ comment + "\n" + \ "Last version of Quex supporting this option is version %s. Please, visit\n" % \ depreciated_since_version + \ "http://quex.org for further information.") # (*) Check for 'Straying' Options ___________________________________________________ options = [] for key, info in SETUP_INFO.items(): if type(info) != list: continue if key in DEPRECATED: continue if info[1] != None: options.extend(info[0]) options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", ""))) ufos = command_line.unidentified_options(options) if ufos != []: error_msg("Unidentified option(s) = " + repr(ufos) + "\n" + \ __get_supported_command_line_option_description(options)) if setup.analyzer_derived_class_name != "" and \ setup.analyzer_derived_class_file == "": error_msg("Specified derived class '%s' on command line, but it was not\n" % \ setup.analyzer_derived_class_name + \ "specified which file contains the definition of it.\n" + \ "use command line option '--derived-class-file'.\n") if setup.buffer_element_size not in [-1, 1, 2, 4]: error_msg( "The setting of '--buffer-element-size' (or '-b') can only be\n" "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size)) if setup.buffer_byte_order not in ["<system>", "little", "big"]: error_msg("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \ "Note, that this option is only interesting for cross plattform development.\n" + \ "By default, quex automatically chooses the endian type of your system.") # Manually written token class requires token class name to be specified if setup.token_class_file != "" and command_line.search( "--token-class", "--tc") == False: error_msg( "The use of a manually written token class requires that the name of the class\n" "is specified on the command line via the '--token-class' option.") # Token queue if setup.token_policy != "queue" and command_line.search( "--token-queue-size"): error_msg("Option --token-queue-size determines a fixed token queue size. This makes\n" + \ "only sense in conjunction with '--token-policy queue'.\n") if setup.token_queue_size <= setup.token_queue_safety_border + 1: if setup.token_queue_size == setup.token_queue_safety_border: cmp_str = "equal to" else: cmp_str = "less than" error_msg("Token queue size is %i is %s token queue safety border %i + 1.\n" % \ (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) + "Set appropriate values with --token-queue-size and --token-queue-safety-border.") # Check that names are valid identifiers __check_identifier(setup, "token_id_prefix_plain", "Token prefix") __check_identifier(setup, "analyzer_class_name", "Engine name") if setup.analyzer_derived_class_name != "": __check_identifier(setup, "analyzer_derived_class_name", "Derived class name") __check_file_name(setup, "token_class_file", "file containing token class definition") __check_file_name(setup, "analyzer_derived_class_file", "file containing user derived lexer class") __check_file_name(setup, "token_id_foreign_definition_file", "file containing user token ids") __check_file_name(setup, "input_mode_files", "quex source file") # Check that not more than one converter is specified converter_n = 0 if setup.converter_iconv_f: converter_n += 1 if setup.converter_icu_f: converter_n += 1 if setup.converter_user_new_func != "": converter_n += 1 if converter_n > 1: error_msg("More than one character converter has been specified. Note, that the\n" + \ "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n" + \ "to be used mutually exclusively.") if converter_n == 1 and setup.buffer_codec != "": error_msg("An engine that is to be generated for a specific codec cannot rely\n" + \ "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \ "`--converter-new`.") # If a user defined type is specified for 'engine character type' and # a converter, then the name of the target type must be specified explicitly. if setup.buffer_element_type != "" \ and not global_character_type_db.has_key(setup.buffer_element_type) \ and setup.converter_ucs_coding_name == "" \ and converter_n != 0: tc = setup.buffer_element_type error_msg("A character code converter has been specified. It is supposed to convert\n" + \ "incoming data into an internal buffer of unicode characters. The size of\n" + \ "each character is determined by '%s' which is a user defined type.\n" % tc + \ "\n" + \ "Quex cannot determine automatically the name that the converter requires\n" + \ "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \ "command line option '--converter-ucs-coding-name' or '--cucn'.") # Token transmission policy token_policy_list = ["queue", "single", "users_token", "users_queue"] if setup.token_policy not in token_policy_list: error_msg("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \ repr(token_policy_list)[1:-1]) elif setup.token_policy == "users_token": error_msg( "Token policy 'users_queue' has be deprecated since 0.49.1. Use\n" "equivalent policy 'single'.") elif setup.token_policy == "users_queue": error_msg( "Token policy 'users_queue' has be deprecated since 0.49.1\n") # Internal engine character encoding def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize): if setup.buffer_codec != CodecName: return elif setup.buffer_element_size == RequiredBufferElementSize: return if setup.buffer_element_size == -1: msg_str = "undetermined (found type '%s')" % setup.buffer_element_type else: msg_str = "is not %i (found %i)" % (RequiredBufferElementSize, setup.buffer_element_size) error_msg("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) + "Consult command line argument '--buffer-element-size'.") if setup.buffer_codec != "": verify_word_in_list( setup.buffer_codec, codec_db.get_supported_codec_list() + ["utf8", "utf16"], "Codec '%s' is not supported." % setup.buffer_codec) __codec_vs_buffer_element_size("utf8", 1) __codec_vs_buffer_element_size("utf16", 2) # Path Compression if setup.compression_path_uniform_f and setup.compression_path_f: error_msg( "Both flags for path compression were set: '--path-compression' and\n" "'--path-compression-uniform'. Please, choose only one!")
def do(argv): """RETURN: True, if process needs to be started. False, if job is done. """ global setup # (*) Interpret Command Line (A) _____________________________________________________ command_line = GetPot(argv) if command_line.search("--version", "-v"): print "Quex - Fast Universal Lexical Analyzer Generator" print "Version " + QUEX_VERSION print "(C) 2006-2010 Frank-Rene Schaefer" print "ABSOLUTELY NO WARRANTY" return False if command_line.search("--help", "-h"): print "Quex - Fast Universal Lexical Analyzer Generator" print "Please, consult the quex documentation for further help, or" print "visit http://quex.org" print "(C) 2006-2010 Frank-Rene Schaefer" print "ABSOLUTELY NO WARRANTY" return False if command_line.search("--plot-format-list"): print quex.output.graphviz.interface.get_supported_graphic_format_description() return False for variable_name, info in SETUP_INFO.items(): # Some parameters are not set on the command line. Their entry is not associated # with a description list. if type(info) != list: continue if info[1] == FLAG: setup.__dict__[variable_name] = command_line.search(info[0]) elif info[1] == NEGATED_FLAG: setup.__dict__[variable_name] = not command_line.search(info[0]) elif info[1] == LIST: if not command_line.search(info[0]): setup.__dict__[variable_name] = [] else: the_list = command_line.nominus_followers(info[0]) if the_list == []: error_msg("Option %s\nnot followed by anything." % repr(info[0])[1:-1]) if setup.__dict__.has_key(variable_name): setup.__dict__[variable_name].extend(the_list) else: setup.__dict__[variable_name] = the_list elif command_line.search(info[0]): if not command_line.search(info[0]): setup.__dict__[variable_name] = info[1] else: value = command_line.follow("--EMPTY--", info[0]) if value == "--EMPTY--": error_msg("Option %s\nnot followed by anything." % repr(info[0])[1:-1]) setup.__dict__[variable_name] = value # (*) Classes and their namespace setup.analyzer_class_name, \ setup.analyzer_name_space, \ setup.analyzer_name_safe = \ read_namespaced_name(setup.analyzer_class_name, "analyzer engine (options -o, --engine, --analyzer-class)") setup.analyzer_derived_class_name, \ setup.analyzer_derived_class_name_space, \ setup.analyzer_derived_class_name_safe = \ read_namespaced_name(setup.analyzer_derived_class_name, "derived analyzer class (options --derived-class, --dc)", AllowEmptyF=True) if setup.analyzer_name_space == []: setup.analyzer_name_space = ["quex"] if setup.token_class_name == "": setup.token_class_name = "%s::Token" % reduce(lambda a, b: a + "::" + b, setup.analyzer_name_space) # Token classes and derived classes have the freedom not to open a namespace, # thus no check 'if namespace == []'. setup.token_class_name, \ setup.token_class_name_space, \ setup.token_class_name_safe = \ read_namespaced_name(setup.token_class_name, "token class (options --token-class, --tc)") if setup.token_class_file != "": lexer_mode.token_type_definition = \ ManualTokenClassSetup(setup.token_class_file, setup.token_class_name, setup.token_class_name_space, setup.token_class_name_safe, setup.token_id_type) if setup.token_class_name_space == []: setup.token_class_name_space = deepcopy(setup.analyzer_name_space) setup.token_id_prefix_plain, \ setup.token_id_prefix_name_space, \ dummy = \ read_namespaced_name(setup.token_id_prefix, "token prefix (options --token-prefix)") if len(setup.token_id_prefix_name_space) != 0 and setup.language.upper() == "C": error_msg("Token id prefix cannot contain a namespaces if '--language' is set to 'C'.") # (*) Output programming language setup.language = setup.language.upper() verify_word_in_list(setup.language, quex_core_engine_generator_languages_db.keys(), "Programming language '%s' is not supported." % setup.language) setup.language_db = quex_core_engine_generator_languages_db[setup.language] setup.extension_db = global_extension_db[setup.language] # Is the output file naming scheme provided by the extension database # (Validation must happen immediately) if setup.extension_db.has_key(setup.output_file_naming_scheme) == False: error_msg("File extension scheme '%s' is not provided for language '%s'.\n" \ % (setup.output_file_naming_scheme, setup.language) + \ "Available schemes are: %s." % repr(setup.extension_db.keys())[1:-1]) # Before file names can be prepared, determine the output directory # If 'source packaging' is enabled and no output directory is specified # then take the directory of the source packaging. if setup.source_package_directory != "" and setup.output_directory == "": setup.output_directory = setup.source_package_directory # (*) Output files prepare_file_names(setup) if setup.buffer_byte_order == "<system>": setup.buffer_byte_order = sys.byteorder setup.byte_order_is_that_of_current_system_f = True else: setup.byte_order_is_that_of_current_system_f = False if setup.buffer_element_size == "wchar_t": error_msg("Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n" "with option '--buffer-element-size' or '-bes'. Please, specify\n" "'--buffer-element-type wchar_t' or '--bet'.") if setup.buffer_element_type == "wchar_t": setup.converter_ucs_coding_name = "WCHAR_T" if setup.buffer_codec != "": setup.buffer_element_size_irrelevant = True make_numbers(setup) # (*) Determine buffer element type and size (in bytes) if setup.buffer_element_size == -1: if global_character_type_db.has_key(setup.buffer_element_type): setup.buffer_element_size = global_character_type_db[setup.buffer_element_type][3] elif setup.buffer_element_type == "": setup.buffer_element_size = 1 else: # If the buffer element type is defined, then here we know that it is 'unknown' # and Quex cannot know its size on its own. setup.buffer_element_size = -1 if setup.buffer_element_type == "": if setup.buffer_element_size in [1, 2, 4]: setup.buffer_element_type = { 1: "uint8_t", 2: "uint16_t", 4: "uint32_t", }[setup.buffer_element_size] elif setup.buffer_element_size == -1: pass else: error_msg("Buffer element type cannot be determined for size '%i' which\n" \ % setup.buffer_element_size + "has been specified by '-b' or '--buffer-element-size'.") if setup.buffer_codec in ["utf8", "utf16"]: setup.buffer_codec_transformation_info = setup.buffer_codec + "-state-split" elif setup.buffer_codec != "": setup.buffer_codec_transformation_info = codec_db.get_codec_transformation_info(setup.buffer_codec) setup.converter_f = False if setup.converter_iconv_f or setup.converter_icu_f: setup.converter_f = True # The only case where no converter helper is required is where ASCII # (Unicode restricted to [0, FF] is used. setup.converter_helper_required_f = True if setup.converter_f == False and setup.buffer_element_size == 1 and setup.buffer_codec == "": setup.converter_helper_required_f = False validation.do(setup, command_line, argv) if setup.converter_ucs_coding_name == "": if global_character_type_db.has_key(setup.buffer_element_type): if setup.buffer_byte_order == "little": index = 1 else: index = 2 setup.converter_ucs_coding_name = global_character_type_db[setup.buffer_element_type][index] if setup.token_id_foreign_definition_file != "": CommentDelimiterList = [["//", "\n"], ["/*", "*/"]] # Regular expression to find '#include <something>' and extract the 'something' # in a 'group'. Note that '(' ')' cause the storage of parts of the match. IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]" # parse_token_id_file(setup.token_id_foreign_definition_file, setup.token_id_prefix, CommentDelimiterList, IncludeRE) if setup.token_id_prefix_plain != setup.token_id_prefix: # The 'plain' name space less token indices are also supported parse_token_id_file(setup.token_id_foreign_definition_file, setup.token_id_prefix_plain, CommentDelimiterList, IncludeRE) # (*) return setup ___________________________________________________________________ return True
def validate(setup, command_line, argv): """Does a consistency check for setup and the command line. """ setup.output_directory = os.path.normpath(setup.output_directory) if setup.output_directory != "": # Check, if the output directory exists if os.access(setup.output_directory, os.F_OK) == False: error_msg( "The directory %s was specified for output, but does not exists." % setup.output_directory) if os.access(setup.output_directory, os.W_OK) == False: error_msg( "The directory %s was specified for output, but is not writeable." % setup.output_directory) # if the mode is 'plotting', then check wether a graphic format is speicified for plot_option in SETUP_INFO["plot_graphic_format"][0]: if plot_option in argv and setup.plot_graphic_format == "": error_msg("Option '%s' must be followed by a graphic format specifier (bmp, svg, jpg, ...)" % \ plot_option) # ensure that options are not specified twice for parameter, info in SETUP_INFO.items(): occurence_n = 0 for option in info[0]: occurence_n += argv.count(option) if occurence_n > 1: error_msg("Received more than one of the following options:\n" + \ "%s" % repr(info[0])[1:-1]) # (*) Check for 'Depraceted' Options ___________________________________________________ for name, info in DEPRECATED.items(): command_line_options = SETUP_INFO[name][0] comment = info[0] depreciated_since_version = info[1] for option in command_line_options: if command_line.search(option): error_msg("Command line option '%s' is ignored.\n" % option + \ "Last version of Quex supporting this option is version %s. Please, visit\n" % depreciated_since_version + \ "http://quex.sourceforge.net for download---Or use a more advanced approach.\n" + \ comment) # (*) Check for 'Straying' Options ___________________________________________________ options = [] for key, info in SETUP_INFO.items(): if key in DEPRECATED: continue if info[1] != None: options.extend(info[0]) options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", ""))) ufos = command_line.unidentified_options(options) if ufos != []: error_msg("Unidentified option(s) = " + repr(ufos) + "\n" + \ __get_supported_command_line_option_description(options)) if setup.input_derived_class_name != "" and \ setup.input_derived_class_file == "": error_msg("Specified derived class '%s' on command line, but it was not\n" % \ setup.input_derived_class_name + \ "specified which file contains the definition of it.\n" + \ "use command line option '--derived-class-file'.\n") # check validity bpc = setup.bytes_per_ucs_code_point if bpc != "wchar_t": if bpc not in ["1", "2", "4"]: error_msg("choice for --bytes-per-ucs-code-point: %s" % bpc + \ "quex only supports 1, 2, or 4 bytes per character in internal engine") sys.exit(-1) else: setup.bytes_per_ucs_code_point = int( setup.bytes_per_ucs_code_point) if setup.byte_order == "<system>": setup.byte_order = sys.byteorder elif setup.byte_order not in ["<system>", "little", "big"]: error_msg("Byte order (option --endian) must be 'little' or 'big'.\n" + \ "Note, that this option is only interesting for cross plattform development.\n" + \ "By default, quex automatically chooses the endian type of your system.") # token offset and several ids if setup.input_token_counter_offset == setup.token_id_termination: error_msg( "Token id offset (--token-offset) == token id for termination (--token-id-termination)\n" ) if setup.input_token_counter_offset == setup.token_id_uninitialized: error_msg( "Token id offset (--token-offset) == token id for uninitialized (--token-id-uninitialized)\n" ) if setup.token_id_termination == setup.token_id_uninitialized: error_msg("Token id for termination (--token-id-termination) and uninitialized (--token-id-uninitialized)\n" + \ "are chosen to be the same. Maybe it works.", DontExitF=True) if setup.input_token_counter_offset < setup.token_id_uninitialized: error_msg("Token id offset (--token-offset) < token id uninitialized (--token-id-uninitialized).\n" + \ "Maybe it works.", DontExitF=True) if setup.input_token_counter_offset < setup.token_id_termination: error_msg("Token id offset (--token-offset) < token id termination (--token-id-termination).\n" + \ "Maybe it works.", DontExitF=True) # check that names are valid identifiers __check_identifier(setup, "input_token_id_prefix", "Token prefix") __check_identifier(setup, "output_engine_name", "Engine name") if setup.input_derived_class_name != "": __check_identifier(setup, "input_derived_class_name", "Derived class name") if setup.input_token_class_name != "": __check_identifier(setup, "input_token_class_name", "Token class name") # '--token-class' and '--token-class-file' needs to appear together if setup.input_token_class_name != "" and setup.input_token_class_file == "": error_msg("User defined token class '%s':\n" % setup.input_token_class_name + \ "Specifying a user-defined token class via '--token-class' requires\n" + \ "that the token class file, also, needs to be specified via '--token-class-file'.") if setup.input_token_class_file != "" and setup.input_token_class_name == "": error_msg("User defined token class file '%s':\n" % setup.input_token_class_file + \ "Specifying a user-defined token class file via '--token-class-file' requires\n" + \ "that the token class, also, needs to be specified via '--token-class'.") # __check_identifier("token_id_termination", "Token id for termination") # __check_identifier("token_id_uninitialized", "Token id for uninitialized") __check_file_name(setup, "input_token_class_file", "file containing user defined token class") __check_file_name(setup, "input_derived_class_file", "file containing user derived lexer class") __check_file_name(setup, "input_foreign_token_id_file", "file containing user token ids") __check_file_name(setup, "input_user_token_id_file", "file containing user token ids") __check_file_name(setup, "input_mode_files", "quex source file")
def do(argv): global setup # (*) Interpret Command Line (A) _____________________________________________________ command_line = GetPot(argv) if command_line.search("--version", "-v"): print "Quex - A Mode Oriented Lexical Analyser" print "Version " + QUEX_VERSION print "(C) 2006-2008 Frank-Rene Schaefer" sys.exit(0) if command_line.search("--help", "-h"): print "Quex - A Mode Oriented Lexical Analyser" print "Please, consult the quex documentation for further help, or" print "visit http://quex.sourceforge.net." print "(C) 2006-2008 Frank-Rene Schaefer" sys.exit(0) for variable_name, info in SETUP_INFO.items(): if info[1] == LIST: the_list = command_line.nominus_followers(info[0]) if setup.__dict__.has_key(variable_name): setup.__dict__[variable_name].extend(the_list) else: setup.__dict__[variable_name] = the_list elif info[1] == FLAG: setup.__dict__[variable_name] = command_line.search(info[0]) else: setup.__dict__[variable_name] = command_line.follow( info[1], info[0]) setup.QUEX_VERSION = QUEX_VERSION setup.QUEX_INSTALLATION_DIR = QUEX_INSTALLATION_DIR setup.QUEX_TEMPLATE_DB_DIR = QUEX_TEMPLATE_DB_DIR # (*) Output files setup.output_file_stem = __prepare_file_name(setup, "") setup.output_token_id_file = __prepare_file_name(setup, "-token_ids") setup.output_header_file = __prepare_file_name(setup, "-internal.h") setup.output_code_file = __prepare_file_name(setup, ".cpp") setup.output_core_engine_file = __prepare_file_name( setup, "-core-engine.cpp") setup.buffer_limit_code = __get_integer(setup.buffer_limit_code, "--buffer-limit") setup.control_character_code_list = [setup.buffer_limit_code] setup.input_token_counter_offset = __get_integer( setup.input_token_counter_offset, "--token-offset") setup.token_id_termination = __get_integer(setup.token_id_termination, "--token-id-termination") setup.token_id_uninitialized = __get_integer(setup.token_id_uninitialized, "--token-id-uninitialized") validate(setup, command_line, argv) if setup.input_foreign_token_id_file != "": CommentDelimiterList = [["//", "\n"], ["/*", "*/"]] # Regular expression to find '#include <something>' and extract the 'something' # in a 'group'. Note that '(' ')' cause the storage of parts of the match. IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]" # parse_token_id_file(setup.input_foreign_token_id_file, setup.input_token_id_prefix, CommentDelimiterList, IncludeRE) # (*) Default values # (Please, do not change this, otherwise no 'empty' options can be detected.) if setup.input_token_class_file == "": setup.input_token_class_file = SETUP_INFO["input_token_class_file"][2] if setup.input_token_class_name == "": setup.input_token_class_name = SETUP_INFO["input_token_class_name"][2] # (*) return setup ___________________________________________________________________ return
def do(argv): """RETURN: True, if process needs to be started. False, if job is done. """ global setup # (*) Interpret Command Line (A) _____________________________________________________ command_line = GetPot(argv) if command_line.search("--version", "-v"): print "Quex - Fast Universal Lexical Analyzer Generator" print "Version " + QUEX_VERSION print "(C) 2006-2010 Frank-Rene Schaefer" print "ABSOLUTELY NO WARRANTY" return False if command_line.search("--help", "-h"): print "Quex - Fast Universal Lexical Analyzer Generator" print "Please, consult the quex documentation for further help, or" print "visit http://quex.org" print "(C) 2006-2010 Frank-Rene Schaefer" print "ABSOLUTELY NO WARRANTY" return False if command_line.search("--plot-format-list"): print quex.output.graphviz.interface.get_supported_graphic_format_description( ) return False for variable_name, info in SETUP_INFO.items(): # Some parameters are not set on the command line. Their entry is not associated # with a description list. if type(info) != list: continue if info[1] == FLAG: setup.__dict__[variable_name] = command_line.search(info[0]) elif info[1] == NEGATED_FLAG: setup.__dict__[variable_name] = not command_line.search(info[0]) elif info[1] == LIST: if not command_line.search(info[0]): setup.__dict__[variable_name] = [] else: the_list = command_line.nominus_followers(info[0]) if the_list == []: error_msg("Option %s\nnot followed by anything." % repr(info[0])[1:-1]) if setup.__dict__.has_key(variable_name): setup.__dict__[variable_name].extend(the_list) else: setup.__dict__[variable_name] = the_list elif command_line.search(info[0]): if not command_line.search(info[0]): setup.__dict__[variable_name] = info[1] else: value = command_line.follow("--EMPTY--", info[0]) if value == "--EMPTY--": error_msg("Option %s\nnot followed by anything." % repr(info[0])[1:-1]) setup.__dict__[variable_name] = value # (*) Classes and their namespace setup.analyzer_class_name, \ setup.analyzer_name_space, \ setup.analyzer_name_safe = \ read_namespaced_name(setup.analyzer_class_name, "analyzer engine (options -o, --engine, --analyzer-class)") setup.analyzer_derived_class_name, \ setup.analyzer_derived_class_name_space, \ setup.analyzer_derived_class_name_safe = \ read_namespaced_name(setup.analyzer_derived_class_name, "derived analyzer class (options --derived-class, --dc)", AllowEmptyF=True) if setup.analyzer_name_space == []: setup.analyzer_name_space = ["quex"] if setup.token_class_name == "": setup.token_class_name = "%s::Token" % reduce( lambda a, b: a + "::" + b, setup.analyzer_name_space) # Token classes and derived classes have the freedom not to open a namespace, # thus no check 'if namespace == []'. setup.token_class_name, \ setup.token_class_name_space, \ setup.token_class_name_safe = \ read_namespaced_name(setup.token_class_name, "token class (options --token-class, --tc)") if setup.token_class_file != "": lexer_mode.token_type_definition = \ ManualTokenClassSetup(setup.token_class_file, setup.token_class_name, setup.token_class_name_space, setup.token_class_name_safe, setup.token_id_type) if setup.token_class_name_space == []: setup.token_class_name_space = deepcopy(setup.analyzer_name_space) setup.token_id_prefix_plain, \ setup.token_id_prefix_name_space, \ dummy = \ read_namespaced_name(setup.token_id_prefix, "token prefix (options --token-prefix)") if len(setup.token_id_prefix_name_space) != 0 and setup.language.upper( ) == "C": error_msg( "Token id prefix cannot contain a namespaces if '--language' is set to 'C'." ) # (*) Output programming language setup.language = setup.language.upper() verify_word_in_list( setup.language, quex_core_engine_generator_languages_db.keys(), "Programming language '%s' is not supported." % setup.language) setup.language_db = quex_core_engine_generator_languages_db[setup.language] setup.extension_db = global_extension_db[setup.language] # Is the output file naming scheme provided by the extension database # (Validation must happen immediately) if setup.extension_db.has_key(setup.output_file_naming_scheme) == False: error_msg("File extension scheme '%s' is not provided for language '%s'.\n" \ % (setup.output_file_naming_scheme, setup.language) + \ "Available schemes are: %s." % repr(setup.extension_db.keys())[1:-1]) # Before file names can be prepared, determine the output directory # If 'source packaging' is enabled and no output directory is specified # then take the directory of the source packaging. if setup.source_package_directory != "" and setup.output_directory == "": setup.output_directory = setup.source_package_directory # (*) Output files prepare_file_names(setup) if setup.buffer_byte_order == "<system>": setup.buffer_byte_order = sys.byteorder setup.byte_order_is_that_of_current_system_f = True else: setup.byte_order_is_that_of_current_system_f = False if setup.buffer_element_size == "wchar_t": error_msg( "Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n" "with option '--buffer-element-size' or '-bes'. Please, specify\n" "'--buffer-element-type wchar_t' or '--bet'.") if setup.buffer_element_type == "wchar_t": setup.converter_ucs_coding_name = "WCHAR_T" if setup.buffer_codec != "": setup.buffer_element_size_irrelevant = True make_numbers(setup) # (*) Determine buffer element type and size (in bytes) if setup.buffer_element_size == -1: if global_character_type_db.has_key(setup.buffer_element_type): setup.buffer_element_size = global_character_type_db[ setup.buffer_element_type][3] elif setup.buffer_element_type == "": setup.buffer_element_size = 1 else: # If the buffer element type is defined, then here we know that it is 'unknown' # and Quex cannot know its size on its own. setup.buffer_element_size = -1 if setup.buffer_element_type == "": if setup.buffer_element_size in [1, 2, 4]: setup.buffer_element_type = { 1: "uint8_t", 2: "uint16_t", 4: "uint32_t", }[setup.buffer_element_size] elif setup.buffer_element_size == -1: pass else: error_msg("Buffer element type cannot be determined for size '%i' which\n" \ % setup.buffer_element_size + "has been specified by '-b' or '--buffer-element-size'.") if setup.buffer_codec in ["utf8", "utf16"]: setup.buffer_codec_transformation_info = setup.buffer_codec + "-state-split" elif setup.buffer_codec != "": setup.buffer_codec_transformation_info = codec_db.get_codec_transformation_info( setup.buffer_codec) setup.converter_f = False if setup.converter_iconv_f or setup.converter_icu_f: setup.converter_f = True # The only case where no converter helper is required is where ASCII # (Unicode restricted to [0, FF] is used. setup.converter_helper_required_f = True if setup.converter_f == False and setup.buffer_element_size == 1 and setup.buffer_codec == "": setup.converter_helper_required_f = False validation.do(setup, command_line, argv) if setup.converter_ucs_coding_name == "": if global_character_type_db.has_key(setup.buffer_element_type): if setup.buffer_byte_order == "little": index = 1 else: index = 2 setup.converter_ucs_coding_name = global_character_type_db[ setup.buffer_element_type][index] if setup.token_id_foreign_definition_file != "": CommentDelimiterList = [["//", "\n"], ["/*", "*/"]] # Regular expression to find '#include <something>' and extract the 'something' # in a 'group'. Note that '(' ')' cause the storage of parts of the match. IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]" # parse_token_id_file(setup.token_id_foreign_definition_file, setup.token_id_prefix, CommentDelimiterList, IncludeRE) if setup.token_id_prefix_plain != setup.token_id_prefix: # The 'plain' name space less token indices are also supported parse_token_id_file(setup.token_id_foreign_definition_file, setup.token_id_prefix_plain, CommentDelimiterList, IncludeRE) # (*) return setup ___________________________________________________________________ return True
def validate(setup, command_line, argv): """Does a consistency check for setup and the command line. """ setup.output_directory = os.path.normpath(setup.output_directory) if setup.output_directory != "": # Check, if the output directory exists if os.access(setup.output_directory, os.F_OK) == False: error_msg("The directory %s was specified for output, but does not exists." % setup.output_directory) if os.access(setup.output_directory, os.W_OK) == False: error_msg("The directory %s was specified for output, but is not writeable." % setup.output_directory) # if the mode is 'plotting', then check wether a graphic format is speicified for plot_option in SETUP_INFO["plot_graphic_format"][0]: if plot_option in argv and setup.plot_graphic_format == "": error_msg("Option '%s' must be followed by a graphic format specifier (bmp, svg, jpg, ...)" % \ plot_option) # ensure that options are not specified twice for parameter, info in SETUP_INFO.items(): occurence_n = 0 for option in info[0]: occurence_n += argv.count(option) if occurence_n > 1: error_msg("Received more than one of the following options:\n" + \ "%s" % repr(info[0])[1:-1]) # (*) Check for 'Depraceted' Options ___________________________________________________ for name, info in DEPRECATED.items(): command_line_options = SETUP_INFO[name][0] comment = info[0] depreciated_since_version = info[1] for option in command_line_options: if command_line.search(option): error_msg("Command line option '%s' is ignored.\n" % option + \ "Last version of Quex supporting this option is version %s. Please, visit\n" % depreciated_since_version + \ "http://quex.sourceforge.net for download---Or use a more advanced approach.\n" + \ comment) # (*) Check for 'Straying' Options ___________________________________________________ options = [] for key, info in SETUP_INFO.items(): if key in DEPRECATED: continue if info[1] != None: options.extend(info[0]) options.sort(lambda a,b: cmp(a.replace("-",""), b.replace("-",""))) ufos = command_line.unidentified_options(options) if ufos != []: error_msg("Unidentified option(s) = " + repr(ufos) + "\n" + \ __get_supported_command_line_option_description(options)) if setup.input_derived_class_name != "" and \ setup.input_derived_class_file == "": error_msg("Specified derived class '%s' on command line, but it was not\n" % \ setup.input_derived_class_name + \ "specified which file contains the definition of it.\n" + \ "use command line option '--derived-class-file'.\n") # check validity bpc = setup.bytes_per_ucs_code_point if bpc != "wchar_t": if bpc not in ["1", "2", "4"]: error_msg("choice for --bytes-per-ucs-code-point: %s" % bpc + \ "quex only supports 1, 2, or 4 bytes per character in internal engine") sys.exit(-1) else: setup.bytes_per_ucs_code_point = int(setup.bytes_per_ucs_code_point) if setup.byte_order == "<system>": setup.byte_order = sys.byteorder elif setup.byte_order not in ["<system>", "little", "big"]: error_msg("Byte order (option --endian) must be 'little' or 'big'.\n" + \ "Note, that this option is only interesting for cross plattform development.\n" + \ "By default, quex automatically chooses the endian type of your system.") # token offset and several ids if setup.input_token_counter_offset == setup.token_id_termination: error_msg("Token id offset (--token-offset) == token id for termination (--token-id-termination)\n") if setup.input_token_counter_offset == setup.token_id_uninitialized: error_msg("Token id offset (--token-offset) == token id for uninitialized (--token-id-uninitialized)\n") if setup.token_id_termination == setup.token_id_uninitialized: error_msg("Token id for termination (--token-id-termination) and uninitialized (--token-id-uninitialized)\n" + \ "are chosen to be the same. Maybe it works.", DontExitF=True) if setup.input_token_counter_offset < setup.token_id_uninitialized: error_msg("Token id offset (--token-offset) < token id uninitialized (--token-id-uninitialized).\n" + \ "Maybe it works.", DontExitF=True) if setup.input_token_counter_offset < setup.token_id_termination: error_msg("Token id offset (--token-offset) < token id termination (--token-id-termination).\n" + \ "Maybe it works.", DontExitF=True) # check that names are valid identifiers __check_identifier(setup, "input_token_id_prefix", "Token prefix") __check_identifier(setup, "output_engine_name", "Engine name") if setup.input_derived_class_name != "": __check_identifier(setup, "input_derived_class_name", "Derived class name") if setup.input_token_class_name != "": __check_identifier(setup, "input_token_class_name", "Token class name") # '--token-class' and '--token-class-file' needs to appear together if setup.input_token_class_name != "" and setup.input_token_class_file == "": error_msg("User defined token class '%s':\n" % setup.input_token_class_name + \ "Specifying a user-defined token class via '--token-class' requires\n" + \ "that the token class file, also, needs to be specified via '--token-class-file'.") if setup.input_token_class_file != "" and setup.input_token_class_name == "": error_msg("User defined token class file '%s':\n" % setup.input_token_class_file + \ "Specifying a user-defined token class file via '--token-class-file' requires\n" + \ "that the token class, also, needs to be specified via '--token-class'.") # __check_identifier("token_id_termination", "Token id for termination") # __check_identifier("token_id_uninitialized", "Token id for uninitialized") __check_file_name(setup, "input_token_class_file", "file containing user defined token class") __check_file_name(setup, "input_derived_class_file", "file containing user derived lexer class") __check_file_name(setup, "input_foreign_token_id_file", "file containing user token ids") __check_file_name(setup, "input_user_token_id_file", "file containing user token ids") __check_file_name(setup, "input_mode_files", "quex source file")
def do(argv): global setup # (*) Interpret Command Line (A) _____________________________________________________ command_line = GetPot(argv) if command_line.search("--version", "-v"): print "Quex - A Mode Oriented Lexical Analyser" print "Version " + QUEX_VERSION print "(C) 2006-2008 Frank-Rene Schaefer" sys.exit(0) if command_line.search("--help", "-h"): print "Quex - A Mode Oriented Lexical Analyser" print "Please, consult the quex documentation for further help, or" print "visit http://quex.sourceforge.net." print "(C) 2006-2008 Frank-Rene Schaefer" sys.exit(0) for variable_name, info in SETUP_INFO.items(): if info[1] == LIST: the_list = command_line.nominus_followers(info[0]) if setup.__dict__.has_key(variable_name): setup.__dict__[variable_name].extend(the_list) else: setup.__dict__[variable_name] = the_list elif info[1] == FLAG: setup.__dict__[variable_name] = command_line.search(info[0]) else: setup.__dict__[variable_name] = command_line.follow(info[1], info[0]) setup.QUEX_VERSION = QUEX_VERSION setup.QUEX_INSTALLATION_DIR = QUEX_INSTALLATION_DIR setup.QUEX_TEMPLATE_DB_DIR = QUEX_TEMPLATE_DB_DIR # (*) Output files setup.output_file_stem = __prepare_file_name(setup, "") setup.output_token_id_file = __prepare_file_name(setup, "-token_ids") setup.output_header_file = __prepare_file_name(setup, "-internal.h") setup.output_code_file = __prepare_file_name(setup, ".cpp") setup.output_core_engine_file = __prepare_file_name(setup, "-core-engine.cpp") setup.buffer_limit_code = __get_integer(setup.buffer_limit_code, "--buffer-limit") setup.control_character_code_list = [setup.buffer_limit_code] setup.input_token_counter_offset = __get_integer(setup.input_token_counter_offset, "--token-offset") setup.token_id_termination = __get_integer(setup.token_id_termination, "--token-id-termination") setup.token_id_uninitialized = __get_integer(setup.token_id_uninitialized, "--token-id-uninitialized") validate(setup, command_line, argv) if setup.input_foreign_token_id_file != "": CommentDelimiterList = [["//", "\n"], ["/*", "*/"]] # Regular expression to find '#include <something>' and extract the 'something' # in a 'group'. Note that '(' ')' cause the storage of parts of the match. IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]" # parse_token_id_file(setup.input_foreign_token_id_file, setup.input_token_id_prefix, CommentDelimiterList, IncludeRE) # (*) Default values # (Please, do not change this, otherwise no 'empty' options can be detected.) if setup.input_token_class_file == "": setup.input_token_class_file = SETUP_INFO["input_token_class_file"][2] if setup.input_token_class_name == "": setup.input_token_class_name = SETUP_INFO["input_token_class_name"][2] # (*) return setup ___________________________________________________________________ return