Пример #1
0
 def _handle_icesquer(s: str) -> None:
     """Handle config parameters in the Icelandic Error Corpus Nonwords"""
     a = s.lower().split("\t")
     if len(a) != 2:
         # Happens in the data, just skip it
         # raise ConfigError("Expected tab between error word and its correction")
         return
     word = a[0].strip()
     if len(word) < 1:
         raise ConfigError(
             "Expected nonempty word before comma in unique_errors section")
     corr = a[1].split(";")[0].strip()  # TODO Only the first value for now
     if len(corr) < 1:
         raise ConfigError(
             "Expected nonempty word after comma in unique_errors section")
     corr_t = tuple(corr.split())
     if not word:
         raise ConfigError(
             "Expected word before the comma in unique_errors section")
     if len(word.split()) != 1:
         # Happens in the data, just skip it
         return
         # raise ConfigError(
         #    "Multiple words not allowed before the comma in unique_errors section"
         # )
     Icesquer.add(word, corr_t)
Пример #2
0
 def _handle_error_forms(s: str) -> None:
     """Handle config parameters in the error_forms section"""
     split = s.strip().split(";")
     if len(split) != 7:
         raise ConfigError(
             "Expected wrong form;lemma;correct form;id;category;tag;errortype"
         )
     wrong_form = split[0].strip()
     correct_form = split[2].strip()
     if wrong_form == correct_form:
         raise ConfigError(
             "Wrong form identical to correct form for '{0}'".format(
                 wrong_form))
     meaning: ErrorFormTuple = (
         split[1].strip(),  # Lemma (stofn)
         correct_form,  # Correct form (ordmynd)
         int(split[3]),  # Id (utg)
         split[4].strip(),  # Category (ordfl)
         split[5].strip(),  # Tag (beyging)
     )
     etype = split[6].strip()
     if etype == "cid":
         CIDErrorForms.add(wrong_form,
                           meaning)  # context-independent errors
     elif etype == "cd":
         CDErrorForms.add(wrong_form, meaning)  # context-dependent errors
     else:
         raise ConfigError("Wrong error type given, expected 'cid' or 'cd'")
Пример #3
0
 def _handle_taboo_words(s: str) -> None:
     """Handle config parameters in the taboo_words section"""
     # Start by parsing explanation string off the end (right hand side), if present
     lquote = s.find('"')
     rquote = s.rfind('"')
     if (lquote >= 0) != (rquote >= 0):
         raise ConfigError(
             "Explanation string for taboo word should be enclosed in double quotes"
         )
     if lquote >= 0:
         # Obtain explanation from within quotes
         explanation = s[lquote + 1:rquote].strip()
         s = s[:lquote].rstrip()
     else:
         # No explanation
         explanation = ""
     if not s:
         raise ConfigError(
             "Expected taboo word and a suggested replacement")
     a = s.lower().split()
     if len(a) > 2:
         raise ConfigError(
             "Expected taboo word and a suggested replacement")
     taboo = a[0].strip()
     if len(a) == 2:
         replacement = a[1].strip()
     else:
         replacement = taboo
     # Check all replacement words, which are separated by slashes '/'
     if any(r.count("_") != 1 for r in replacement.split("/")):
         raise ConfigError(
             "Suggested replacement(s) should include a word category (_xx)"
         )
     TabooWords.add(taboo, replacement, explanation)
Пример #4
0
 def _handle_suggestions(s: str) -> None:
     """Handle config parameters in the suggestions section"""
     a = s.lower().split()
     if len(a) < 2:
         raise ConfigError(
             "Expected bad word and at least one suggested replacement")
     if any(w.count("_") != 1 for w in a[1:]):
         raise ConfigError(
             "Suggested replacements should include word category (_xx)")
     Suggestions.add(a[0].strip(), [w.strip() for w in a[1:]])
Пример #5
0
 def _handle_taboo_words(s):
     """ Handle config parameters in the taboo_words section """
     a = s.lower().split()
     if len(a) != 2:
         raise ConfigError(
             "Expected taboo word and a suggested replacement")
     if a[1].count("_") != 1:
         raise ConfigError(
             "Suggested replacement should include word category (_xx)")
     TabooWords.add(a[0].strip(), a[1].strip())
Пример #6
0
 def _handle_allowed_multiples(s: str) -> None:
     """Handle config parameters in the allowed_multiples section"""
     assert s
     if len(s.split()) != 1:
         raise ConfigError(
             "Only one word per line allowed in allowed_multiples section")
     if s in AllowedMultiples.SET:
         raise ConfigError(
             "'{0}' is repeated in allowed_multiples section".format(s))
     AllowedMultiples.add(s)
Пример #7
0
 def add(word: str, replacement: str, explanation: str) -> None:
     if word in TabooWords.DICT:
         raise ConfigError(
             "Multiple definition of '{0}' in taboo_words section".format(
                 word))
     db = GreynirBin.get_db()
     a = word.split("_")
     _, m = db.lookup_g(a[0])
     if not m or (len(a) >= 2 and all(mm.ordfl != a[1] for mm in m)):
         raise ConfigError(
             "The taboo word '{0}' is not found in BÍN".format(word))
     TabooWords.DICT[word] = (replacement, explanation)
Пример #8
0
    def read(fname: str) -> None:
        """ Read configuration file """

        with Settings._lock:

            if Settings.loaded:
                return

            CONFIG_HANDLERS = {
                "settings": Settings._handle_settings,
                "undeclinable_adjectives": Settings._handle_undeclinable_adjectives,
                "noindex_words": Settings._handle_noindex_words,
            }
            handler = None  # Current section handler

            rdr = None
            try:
                rdr = LineReader(fname)
                for s in rdr.lines():
                    # Ignore comments
                    ix = s.find("#")
                    if ix >= 0:
                        s = s[0:ix]
                    s = s.strip()
                    if not s:
                        # Blank line: ignore
                        continue
                    if s[0] == "[" and s[-1] == "]":
                        # New section
                        section = s[1:-1].strip().lower()
                        if section in CONFIG_HANDLERS:
                            handler = CONFIG_HANDLERS[section]
                            continue
                        raise ConfigError("Unknown section name '{0}'".format(section))
                    if handler is None:
                        raise ConfigError("No handler for config line '{0}'".format(s))
                    # Call the correct handler depending on the section
                    try:
                        handler(s)
                    except ConfigError as e:
                        # Add file name and line number information to the exception
                        # if it's not already there
                        e.set_pos(rdr.fname(), rdr.line())
                        raise e

            except ConfigError as e:
                # Add file name and line number information to the exception
                # if it's not already there
                if rdr:
                    e.set_pos(rdr.fname(), rdr.line())
                raise e

            Settings.loaded = True
Пример #9
0
 def _handle_settings(s: str) -> None:
     """Handle config parameters in the settings section"""
     a: List[str] = s.lower().split("=", maxsplit=1)
     par = a[0].strip().lower()
     val = a[1].strip()
     try:
         if par == "debug":
             Settings.DEBUG = val in TRUE
         else:
             raise ConfigError(
                 "Unknown configuration parameter '{0}'".format(par))
     except ValueError:
         raise ConfigError("Invalid parameter value: {0} = {1}".format(
             par, val))
Пример #10
0
 def add(word: str, parts: Tuple[str, ...]) -> None:
     if word in WrongCompounds.DICT:
         raise ConfigError(
             "Multiple definition of '{0}' in wrong_compounds section".
             format(word))
     assert isinstance(parts, tuple)
     WrongCompounds.DICT[word] = parts
Пример #11
0
 def add(first_part: str, second_part_stem: str) -> None:
     if (first_part in SplitCompounds.DICT
             and second_part_stem in SplitCompounds.DICT[first_part]):
         raise ConfigError(
             "Multiple definition of '{0}' in split_compounds section".
             format(first_part + " " + second_part_stem))
     SplitCompounds.DICT[first_part].add(second_part_stem)
Пример #12
0
 def _handle_split_compounds(s: str) -> None:
     """Handle config parameters in the split_compounds section"""
     parts = s.split()
     if len(parts) != 2:
         raise ConfigError(
             "Missing word part(s) in split_compounds section")
     SplitCompounds.add(parts[0], parts[1])
Пример #13
0
 def _handle_ritmyndir(s: str) -> None:
     """Handle data from Ritmyndir in Stórasnið in BÍN/DIM"""
     split = s.strip().split(";")
     if len(split) != 13:
         raise ConfigError(
             "Expected lemma, id, cat, wrong_word_form, correct_word_form, tag, eink, malsnid, stafs, aslatt, beyg, age, ref"
         )
     ref = split[12].strip()
     if "SAGA" in ref or ref in {"MILTON", "HALLGP-4", "KLIM", "ONP"}:
         # Skipping errors from very old references, don't represent errors in Modern Icelandic
         return
     wrong_form = split[3].strip()
     correct_form = split[4].strip()
     if wrong_form == correct_form:
         return
     if wrong_form.lower() == correct_form.lower():
         # TODO Skipping capitalization errors for now
         return
     # (lemma, id, cat, correct_word_form, tag, eink, malsnid, stafs, aslatt, beyg)
     meaning: RitmyndirTuple = (
         split[0].strip(),  # Lemma
         int(split[1].strip()),  # id
         split[2].strip(),  # cat
         correct_form,  # correct_word_form
         split[5].strip(),  # tag
         int(split[6].strip()),  # eink
         split[7].strip(),  # malsnid
         split[8].strip(),  # stafs
         split[9].strip(),  # aslatt
         split[10].strip(),  # beyg
     )
     Ritmyndir.add(wrong_form, meaning)
Пример #14
0
 def add(morph: str, boundlist: List[str], freelist: List[str]) -> None:
     if not boundlist:
         raise ConfigError(
             "A definition of allowed PoS is necessary with morphemes")
     Morphemes.BOUND_DICT[morph] = boundlist
     # The freelist may be empty
     Morphemes.FREE_DICT[morph] = freelist
Пример #15
0
 def _handle_multiword_errors(s: str) -> None:
     """Handle config parameters in the multiword_errors section"""
     a = s.lower().split("$error", maxsplit=1)
     if len(a) != 2:
         raise ConfigError("Expected phrase followed by $error(...)")
     phrase = tuple(a[0].strip().split())
     if len(phrase) < 2:
         raise ConfigError(
             "Multiword phrase must contain at least two words")
     error = a[1].strip()
     if len(error) < 3:
         raise ConfigError(
             "Incomplete error specification for multiword phrase")
     if error[0] != "(" or error[-1] != ")":
         raise ConfigError(
             "Error specification should be enclosed in parentheses")
     MultiwordErrors.add(phrase, error[1:-1])
Пример #16
0
 def _handle_undeclinable_adjectives(s: str) -> None:
     """ Handle list of undeclinable adjectives """
     s = s.lower().strip()
     if not s.isalpha():
         raise ConfigError(
             "Expected word but got '{0}' in undeclinable_adjectives".format(s)
         )
     UndeclinableAdjectives.add(s)
Пример #17
0
 def _handle_unique_errors(s: str) -> None:
     """Handle config parameters in the unique_errors section"""
     a = s.lower().split(",", maxsplit=1)
     if len(a) != 2:
         raise ConfigError(
             "Expected comma between error word and its correction")
     word = a[0].strip()
     if len(word) < 3:
         raise ConfigError(
             "Expected nonempty word before comma in unique_errors section")
     if word[0] != '"' or word[-1] != '"':
         raise ConfigError(
             "Expected word in double quotes in unique_errors section")
     word = word[1:-1]
     corr = a[1].strip()
     if len(corr) < 3:
         raise ConfigError(
             "Expected nonempty word after comma in unique_errors section")
     if corr[0] != '"' or corr[-1] != '"':
         raise ConfigError(
             "Expected word in double quotes after comma in unique_errors section"
         )
     corr = corr[1:-1]
     corr_t = tuple(corr.split())
     if not word:
         raise ConfigError(
             "Expected word before the comma in unique_errors section")
     if len(word.split()) != 1:
         raise ConfigError(
             "Multiple words not allowed before the comma in unique_errors section"
         )
     UniqueErrors.add(word, corr_t)
Пример #18
0
 def _handle_wrong_compounds(s: str) -> None:
     """Handle config parameters in the wrong_compounds section"""
     a = s.lower().split(",", maxsplit=1)
     if len(a) != 2:
         raise ConfigError(
             "Expected comma between compound word and its parts")
     word = a[0].strip().strip('"')
     parts = a[1].strip().strip('"').split()
     if not word:
         raise ConfigError(
             "Expected word before the comma in wrong_compounds section")
     if len(parts) < 2:
         raise ConfigError(
             "Missing word part(s) in wrong_compounds section")
     if len(word.split()) != 1:
         raise ConfigError(
             "Multiple words not allowed before comma in wrong_compounds section"
         )
     WrongCompounds.add(word, tuple(parts))
Пример #19
0
 def _handle_morphemes(s: str) -> None:
     """Process the contents of the [morphemes] section"""
     freelist: List[str] = []
     boundlist: List[str] = []
     spl = s.split()
     if len(spl) < 2:
         raise ConfigError(
             "Expected at least a prefix and an attachment specification")
     m = spl[0]
     for pos in spl[1:]:
         if pos:
             if pos.startswith("+"):
                 boundlist.append(pos[1:])
             elif pos.startswith("-"):
                 freelist.append(pos[1:])
             else:
                 raise ConfigError(
                     "Attachment specification should start with '+' or '-'"
                 )
     Morphemes.add(m, boundlist, freelist)
Пример #20
0
 def _handle_settings(s):
     """ Handle config parameters in the settings section """
     a = s.lower().split("=", maxsplit=1)
     par = a[0].strip().lower()
     val = a[1].strip()
     if val.lower() == "none":
         val = None
     elif val.lower() == "true":
         val = True
     elif val.lower() == "false":
         val = False
     try:
         if par == "debug":
             Settings.DEBUG = val in TRUE
         else:
             raise ConfigError(
                 "Unknown configuration parameter '{0}'".format(par))
     except ValueError:
         raise ConfigError("Invalid parameter value: {0} = {1}".format(
             par, val))
Пример #21
0
 def add(word: str) -> None:
     """Add the given (wrongly capitalized) word stem to the stem set"""
     # We support compound words such as 'félags- og barnamálaráðherra' here
     split_on_hyphen = False
     if " " in word:
         prefix, suffix = word.rsplit(" ", maxsplit=1)
         prefix += " "
     else:
         prefix, suffix = "", word
         # Split_on_hyphen is True for e.g. 'norður-kórea' and 'nýja-sjáland'
         split_on_hyphen = "-" in word
     db = GreynirBin().get_db()
     # The suffix may not be in BÍN except as a compound, and in that
     # case we want its hyphenated lemma
     suffix_rev = CapitalizationErrors.reverse_capitalization(
         suffix, split_on_hyphen=split_on_hyphen)
     _, m = db.lookup_g(suffix_rev)
     # Only consider lemmas
     m = [mm for mm in m if mm.stofn == mm.ordmynd]
     if not m:
         raise ConfigError(
             "No BÍN meaning for '{0}' (from error word '{1}') in capitalization_errors section"
             .format(suffix_rev, word))
     if not prefix:
         # This might be something like 'barnamálaráðherra' which comes out
         # with a lemma of 'barnamála-ráðherra'
         word = CapitalizationErrors.emulate_case(m[0].stofn, template=word)
     else:
         # This might be something like 'félags- og barnamálaráðherra' which comes out
         # with a lemma of 'félags- og barnamála-ráðherra'
         word = prefix + m[0].stofn
     if word in CapitalizationErrors.SET:
         raise ConfigError(
             "Multiple definition of '{0}' in capitalization_errors section"
             .format(word))
     # Construct the reverse casing of the word
     word_rev = CapitalizationErrors.reverse_capitalization(
         word, split_on_hyphen=split_on_hyphen)
     # Add the word and its reverse case to the set of errors
     CapitalizationErrors.SET.add(word)
     CapitalizationErrors.SET_REV.add(word_rev)
Пример #22
0
 def add(word: str) -> None:
     """ Add the given (wrongly capitalized) word stem to the stem set """
     if word in CapitalizationErrors.SET:
         raise ConfigError(
             "Multiple definition of '{0}' in capitalization_errors section"
             .format(word))
     CapitalizationErrors.SET.add(word)
     if word.islower():
         CapitalizationErrors.SET_REV.add(word.title())
     else:
         assert word.istitle()
         CapitalizationErrors.SET_REV.add(word.lower())
Пример #23
0
    def add(words: Tuple[str, ...], error: str) -> None:
        if words in MultiwordErrors.ERROR_DICT:
            raise ConfigError(
                "Multiple definition of '{0}' in multiword_errors section".
                format(" ".join(words)))
        MultiwordErrors.ERROR_DICT[words] = error

        # Add to phrase list
        ix = len(MultiwordErrors.LIST)

        a = error.split(",")
        if len(a) != 2:
            raise ConfigError(
                "Expected two comma-separated parameters within $error()")
        code = a[0].strip()
        replacement = a[1].strip().split()

        # Append the phrase and the error specification in tuple form
        MultiwordErrors.LIST.append((words, code, replacement))

        # Dictionary structure: dict { firstword: [ (restword_list, phrase_index) ] }
        MultiwordErrors.DICT[words[0]].append((list(words[1:]), ix))
Пример #24
0
 def _handle_noindex_words(s: str) -> None:
     """ Handle no index instructions in the settings section """
     # Format: category = [cat] followed by word stem list
     a = s.lower().split("=", maxsplit=1)
     par = a[0].strip()
     if len(a) == 2:
         val = a[1].strip()
         if par == "category":
             NoIndexWords.set_cat(val)
         else:
             raise ConfigError("Unknown setting '{0}' in noindex_words".format(par))
         return
     assert len(a) == 1
     NoIndexWords.add(par)
Пример #25
0
 def _handle_settings(s: str) -> None:
     """ Handle config parameters in the settings section """
     a = s.lower().split("=", maxsplit=1)
     par = a[0].strip().lower()
     sval = a[1].strip()
     val: Union[None, str, bool] = sval
     if sval.lower() == "none":
         val = None
     elif sval.lower() == "true":
         val = True
     elif sval.lower() == "false":
         val = False
     try:
         if par == "db_hostname":
             Settings.DB_HOSTNAME = str(val)
         elif par == "db_port":
             Settings.DB_PORT = int(val or 0)
         elif par == "bin_db_hostname":
             # This is no longer required and has been deprecated
             pass
         elif par == "bin_db_port":
             # This is no longer required and has been deprecated
             pass
         elif par == "host":
             Settings.HOST = str(val)
         elif par == "port":
             Settings.PORT = int(val or 0)
         elif par == "simserver_host":
             Settings.SIMSERVER_HOST = str(val)
         elif par == "simserver_port":
             Settings.SIMSERVER_PORT = int(val or 0)
         elif par == "debug":
             Settings.DEBUG = bool(val)
         else:
             raise ConfigError("Unknown configuration parameter '{0}'".format(par))
     except ValueError:
         raise ConfigError("Invalid parameter value: {0}={1}".format(par, val))
Пример #26
0
 def reverse_capitalization(word: str,
                            *,
                            split_on_hyphen: bool = False) -> str:
     """Return a word with its capitalization reversed (lower <-> upper case)"""
     if split_on_hyphen and "-" in word:
         # 'norður-kórea' -> 'Norður-Kórea'
         return "-".join(
             CapitalizationErrors.reverse_capitalization(part)
             for part in word.split("-"))
     if word.islower():
         # Lowercase word
         word_rev = word.capitalize()
     elif word.isupper() and len(word) > 1:
         # Multi-letter uppercase acronym
         word_rev = word.capitalize()
     elif word[0].isupper() and word[1:].islower():
         # Uppercase word
         word_rev = word.lower()
     else:
         raise ConfigError(
             "'{0}' cannot have mixed capitalization".format(word))
     return word_rev
Пример #27
0
 def _handle_ow_forms(s: str) -> None:
     """Handle config parameters in the ow_forms section"""
     split = s.strip().split(";")
     if len(split) != 6:
         raise ConfigError(
             "Expected wrong form;lemma;correct form;id;category;tag")
     wrong_form = split[0].strip()
     correct_form = split[2].strip()
     if wrong_form == correct_form:
         return
         # !!! TODO: Should do this:
         # raise ConfigError(
         #     "Wrong form identical to correct form for '{0}'".format(wrong_form)
         # )
     meaning: ErrorFormTuple = (
         split[1].strip(),  # Lemma (stofn)
         correct_form,  # Correct form (ordmynd)
         int(split[3]),  # Id (utg)
         split[4].strip(),  # Category (ordfl)
         split[5].strip(),  # Tag (beyging)
     )
     OwForms.add(wrong_form, meaning)
Пример #28
0
class Settings:

    """ Global settings """

    _lock = threading.Lock()
    loaded = False

    # Postgres SQL database server hostname and port
    DB_HOSTNAME = os.environ.get("GREYNIR_DB_HOST", "localhost")
    DB_PORT_STR = os.environ.get("GREYNIR_DB_PORT", "5432")  # Default PostgreSQL port
    DB_USERNAME = os.environ.get("GREYNIR_DB_USERNAME", "reynir")
    DB_PASSWORD = os.environ.get("GREYNIR_DB_PASSWORD", "reynir")

    try:
        DB_PORT = int(DB_PORT_STR)
    except ValueError:
        raise ConfigError(
            "Invalid environment variable value: DB_PORT={0}".format(DB_PORT_STR)
        )

    # Flask server host and port
    HOST = os.environ.get("GREYNIR_HOST", "localhost")
    PORT_STR = os.environ.get("GREYNIR_PORT", "5000")
    try:
        PORT = int(PORT_STR)
    except ValueError:
        raise ConfigError(
            "Invalid environment variable value: GREYNIR_PORT={0}".format(PORT_STR)
        )

    # Flask debug parameter
    DEBUG = False

    # Similarity server
    SIMSERVER_HOST = os.environ.get("SIMSERVER_HOST", "localhost")
    SIMSERVER_PORT_STR = os.environ.get("SIMSERVER_PORT", "5001")
    try:
        SIMSERVER_PORT = int(SIMSERVER_PORT_STR)
    except ValueError:
        raise ConfigError(
            "Invalid environment variable value: SIMSERVER_PORT={0}".format(
                SIMSERVER_PORT_STR
            )
        )

    if SIMSERVER_PORT == PORT:
        raise ConfigError(
            "Can't run both main server and "
            "similarity server on port {0}".format(PORT)
        )

    NN_PARSING_ENABLED = os.environ.get("NN_PARSING_ENABLED", False)
    try:
        NN_PARSING_ENABLED = bool(int(NN_PARSING_ENABLED))
    except ValueError:
        raise ConfigError(
            "Invalid environment variable value: NN_PARSING_ENABLED = {0}".format(
                NN_PARSING_ENABLED
            )
        )
    NN_PARSING_HOST = os.environ.get("NN_PARSING_HOST", "localhost")
    NN_PARSING_PORT_STR = os.environ.get("NN_PARSING_PORT", "9000")
    try:
        NN_PARSING_PORT = int(NN_PARSING_PORT_STR)
    except ValueError:
        raise ConfigError(
            "Invalid environment variable value: NN_PARSING_PORT = {0}".format(
                NN_PARSING_PORT_STR
            )
        )

    NN_TRANSLATION_ENABLED = os.environ.get("NN_TRANSLATION_ENABLED", False)
    try:
        NN_TRANSLATION_ENABLED = bool(int(NN_TRANSLATION_ENABLED))
    except ValueError:
        raise ConfigError(
            "Invalid environment variable value: NN_TRANSLATION_ENABLED = {0}".format(
                NN_TRANSLATION_ENABLED
            )
        )
    NN_TRANSLATION_HOST = os.environ.get("NN_TRANSLATION_HOST", "localhost")
    NN_TRANSLATION_PORT_STR = os.environ.get("NN_TRANSLATION_PORT", "9001")
    try:
        NN_TRANSLATION_PORT = int(NN_TRANSLATION_PORT_STR)
    except ValueError:
        raise ConfigError(
            "Invalid environment variable value: NN_TRANSLATION_PORT = {0}".format(
                NN_TRANSLATION_PORT_STR
            )
        )

    # Configuration settings from the Greynir.conf file

    @staticmethod
    def _handle_settings(s: str) -> None:
        """ Handle config parameters in the settings section """
        a = s.lower().split("=", maxsplit=1)
        par = a[0].strip().lower()
        sval = a[1].strip()
        val: Union[None, str, bool] = sval
        if sval.lower() == "none":
            val = None
        elif sval.lower() == "true":
            val = True
        elif sval.lower() == "false":
            val = False
        try:
            if par == "db_hostname":
                Settings.DB_HOSTNAME = str(val)
            elif par == "db_port":
                Settings.DB_PORT = int(val or 0)
            elif par == "bin_db_hostname":
                # This is no longer required and has been deprecated
                pass
            elif par == "bin_db_port":
                # This is no longer required and has been deprecated
                pass
            elif par == "host":
                Settings.HOST = str(val)
            elif par == "port":
                Settings.PORT = int(val or 0)
            elif par == "simserver_host":
                Settings.SIMSERVER_HOST = str(val)
            elif par == "simserver_port":
                Settings.SIMSERVER_PORT = int(val or 0)
            elif par == "debug":
                Settings.DEBUG = bool(val)
            else:
                raise ConfigError("Unknown configuration parameter '{0}'".format(par))
        except ValueError:
            raise ConfigError("Invalid parameter value: {0}={1}".format(par, val))

    @staticmethod
    def _handle_undeclinable_adjectives(s: str) -> None:
        """ Handle list of undeclinable adjectives """
        s = s.lower().strip()
        if not s.isalpha():
            raise ConfigError(
                "Expected word but got '{0}' in undeclinable_adjectives".format(s)
            )
        UndeclinableAdjectives.add(s)

    @staticmethod
    def _handle_noindex_words(s: str) -> None:
        """ Handle no index instructions in the settings section """
        # Format: category = [cat] followed by word stem list
        a = s.lower().split("=", maxsplit=1)
        par = a[0].strip()
        if len(a) == 2:
            val = a[1].strip()
            if par == "category":
                NoIndexWords.set_cat(val)
            else:
                raise ConfigError("Unknown setting '{0}' in noindex_words".format(par))
            return
        assert len(a) == 1
        NoIndexWords.add(par)

    @staticmethod
    def read(fname: str) -> None:
        """ Read configuration file """

        with Settings._lock:

            if Settings.loaded:
                return

            CONFIG_HANDLERS = {
                "settings": Settings._handle_settings,
                "undeclinable_adjectives": Settings._handle_undeclinable_adjectives,
                "noindex_words": Settings._handle_noindex_words,
            }
            handler = None  # Current section handler

            rdr = None
            try:
                rdr = LineReader(fname)
                for s in rdr.lines():
                    # Ignore comments
                    ix = s.find("#")
                    if ix >= 0:
                        s = s[0:ix]
                    s = s.strip()
                    if not s:
                        # Blank line: ignore
                        continue
                    if s[0] == "[" and s[-1] == "]":
                        # New section
                        section = s[1:-1].strip().lower()
                        if section in CONFIG_HANDLERS:
                            handler = CONFIG_HANDLERS[section]
                            continue
                        raise ConfigError("Unknown section name '{0}'".format(section))
                    if handler is None:
                        raise ConfigError("No handler for config line '{0}'".format(s))
                    # Call the correct handler depending on the section
                    try:
                        handler(s)
                    except ConfigError as e:
                        # Add file name and line number information to the exception
                        # if it's not already there
                        e.set_pos(rdr.fname(), rdr.line())
                        raise e

            except ConfigError as e:
                # Add file name and line number information to the exception
                # if it's not already there
                if rdr:
                    e.set_pos(rdr.fname(), rdr.line())
                raise e

            Settings.loaded = True
Пример #29
0
 def add(word: str, replacements: List[str]) -> None:
     if word in Suggestions.DICT:
         raise ConfigError(
             "Multiple definition of '{0}' in suggestions section".format(
                 word))
     Suggestions.DICT[word] = replacements
Пример #30
0
    def read(fname: str) -> None:
        """Read configuration file"""

        with Settings._lock:

            if Settings.loaded or UniqueErrors.DICT or AllowedMultiples.SET:
                return

            CONFIG_HANDLERS = {
                "settings": Settings._handle_settings,
                "allowed_multiples": Settings._handle_allowed_multiples,
                "wrong_compounds": Settings._handle_wrong_compounds,
                "split_compounds": Settings._handle_split_compounds,
                "unique_errors": Settings._handle_unique_errors,
                "capitalization_errors":
                Settings._handle_capitalization_errors,
                "taboo_words": Settings._handle_taboo_words,
                "suggestions": Settings._handle_suggestions,
                "multiword_errors": Settings._handle_multiword_errors,
                "morphemes": Settings._handle_morphemes,
                "ow_forms": Settings._handle_ow_forms,
                "error_forms": Settings._handle_error_forms,
                "auto_ow": Settings._handle_ow_forms,
                "auto_error": Settings._handle_error_forms,
                "iec_nonwords": Settings._handle_iec_nonwords,
                "icesquer": Settings._handle_icesquer,
                "ritmyndir": Settings._handle_ritmyndir,
                "ritmyndir_details": Settings._handle_ritmyndir_details,
            }
            handler = None  # Current section handler

            rdr = None
            try:
                rdr = LineReader(fname, package_name=__name__)
                for s in rdr.lines():
                    # Ignore comments
                    ix = s.find("#")
                    if ix >= 0:
                        s = s[0:ix]
                    s = s.strip()
                    if not s:
                        # Blank line: ignore
                        continue
                    if s[0] == "[" and s[-1] == "]":
                        # New section
                        section = s[1:-1].strip().lower()
                        if section in CONFIG_HANDLERS:
                            handler = CONFIG_HANDLERS[section]
                            continue
                        raise ConfigError(
                            "Unknown section name '{0}'".format(section))
                    if handler is None:
                        raise ConfigError(
                            "No handler for config line '{0}'".format(s))
                    # Call the correct handler depending on the section
                    try:
                        handler(s)
                    except ConfigError as e:
                        # Add file name and line number information to the exception
                        # if it's not already there
                        e.set_pos(rdr.fname(), rdr.line())
                        raise e

            except ConfigError as e:
                # Add file name and line number information to the exception
                # if it's not already there
                if rdr:
                    e.set_pos(rdr.fname(), rdr.line())
                raise e

            Settings.loaded = True