Exemplo n.º 1
0
def countGroupInRegex(sRegex):
    try:
        return re.compile(sRegex).groups
    except:
        traceback.print_exc()
        echo(sRegex)
    return 0
Exemplo n.º 2
0
def getServerOptions():
    xConfig = configparser.SafeConfigParser()
    try:
        xConfig.read("server_options._global.ini")
        dOpt = xConfig._sections['options']
    except:
        echo(
            "Options file [server_options._global.ini] not found or not readable"
        )
        exit()
    return dOpt
Exemplo n.º 3
0
 def test_parse(self):
     zOption = re.compile("^__([a-zA-Z0-9]+)__ ")
     bShowUntested = False
     for sf in ["gc_test.txt"]:
         with self.subTest(msg=sf):
             with open("./tests/fr/" + sf, "r", encoding="utf-8") as hSrc:
                 for sLine in (s for s in hSrc
                               if not s.startswith("#") and s.strip()):
                     sLineNum = sLine[:10].strip()
                     sLine = sLine[10:].strip()
                     sOption = None
                     m = zOption.search(sLine)
                     if m:
                         sLine = sLine[m.end():]
                         sOption = m.group(1)
                     if "->>" in sLine:
                         sErrorText, sExceptedSuggs = self._splitTestLine(
                             sLine)
                         if sExceptedSuggs.startswith(
                                 '"') and sExceptedSuggs.endswith('"'):
                             sExceptedSuggs = sExceptedSuggs[1:-1]
                     else:
                         sErrorText = sLine.strip()
                         sExceptedSuggs = ""
                     sExpectedErrors = self._getExpectedErrors(sErrorText)
                     sTextToCheck = sErrorText.replace("}}", "").replace(
                         "{{", "")
                     sFoundErrors, sListErr, sFoundSuggs = self._getFoundErrors(
                         sTextToCheck, sOption)
                     self.assertEqual(sExpectedErrors, sFoundErrors, \
                                      "\n# Line num: " + sLineNum + \
                                      "\n> to check: " + _fuckBackslashUTF8(sTextToCheck) + \
                                      "\n  expected: " + sExpectedErrors + \
                                      "\n  found:    " + sFoundErrors + \
                                      "\n  errors:   \n" + sListErr)
                     if sExceptedSuggs:
                         self.assertEqual(
                             sExceptedSuggs, sFoundSuggs, "\n# Line num: " +
                             sLineNum + "\n> to check: " +
                             _fuckBackslashUTF8(sTextToCheck) +
                             "\n  errors:   \n" + sListErr)
             bShowUntested = True
     if bShowUntested:
         i = 0
         for sOpt, sLineId, sRuleId in gce.listRules():
             if sLineId not in self._aRuleTested and not re.match(
                     "[0-9]+[sp]$", sRuleId):
                 echo(sRuleId, end=", ")
                 i += 1
         if i:
             echo("\n[{} untested rules]".format(i))
Exemplo n.º 4
0
def regex2js(sRegex):
    "converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions"
    #   Latin letters: http://unicode-table.com/fr/
    #   0-9  and  _
    #   A-Z
    #   a-z
    #   À-Ö     00C0-00D6   (upper case)
    #   Ø-ß     00D8-00DF   (upper case)
    #   à-ö     00E0-00F6   (lower case)
    #   ø-ÿ     00F8-00FF   (lower case)
    #   Ā-ʯ     0100-02AF   (mixed)
    #   -> a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ
    bCaseInsensitive = False
    if "(?i)" in sRegex:
        sRegex = sRegex.replace("(?i)", "")
        bCaseInsensitive = True
    lNegLookBeforeRegex = []
    if WORDLIMITLEFT in sRegex:
        sRegex = sRegex.replace(WORDLIMITLEFT, "")
        lNegLookBeforeRegex = ["[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ.,–-]$"]
    sRegex = sRegex.replace("[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ")
    sRegex = sRegex.replace("\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ]")
    sRegex = sRegex.replace("[.]", r"\.")
    if not sRegex.startswith("<js>"):
        sRegex = sRegex.replace("/", r"\/")
    m = re.search(
        r"\(\?<!([^()]+)\)", sRegex
    )  # Negative lookbefore assertion should always be at the beginning of regex
    if m:
        lNegLookBeforeRegex.append(m.group(1) + "$")
        sRegex = sRegex.replace(m.group(0), "")
    if "(?<" in sRegex:
        echo("# Warning. Lookbefore assertion not changed in:\n  ")
        echo(sRegex)
    if sRegex.startswith("<js>"):
        sRegex = sRegex.replace('<js>',
                                '/').replace('</js>i',
                                             '/ig').replace('</js>', '/g')
    else:
        sRegex = "/" + sRegex + "/g"
    if bCaseInsensitive and not sRegex.endswith("/ig"):
        sRegex = sRegex + "i"
    if not lNegLookBeforeRegex:
        lNegLookBeforeRegex = None
    return (sRegex, lNegLookBeforeRegex)
Exemplo n.º 5
0
def getConfigOptions(sLang):
    xConfig = configparser.SafeConfigParser()
    try:
        xConfig.read("server_options." + sLang + ".ini")
    except:
        echo("Options file [server_options." + sLang +
             ".ini] not found or not readable")
        exit()
    try:
        dGCOpt = {
            k: bool(int(v))
            for k, v in xConfig._sections['gc_options'].items()
        }
    except:
        echo("Error in options file [server_options." + sLang +
             ".ini]. Dropped.")
        traceback.print_exc()
        exit()
    return dGCOpt
Exemplo n.º 6
0
def makePhonetTable(sp, bJS=False):
    print("make phonet tables")

    try:
        oDict = ibdawg.IBDAWG("French.bdic")
    except:
        traceback.print_exc()
        return

    with open(sp + "/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc:
        # set of homophonic words
        lSet = []
        for sLine in hSrc.readlines():
            if not sLine.startswith("#") and sLine.strip():
                lSet.append(sorted(sLine.strip().split()))
        # dictionary of words
        dWord = {}
        for i, aSet in enumerate(lSet):
            for sWord in aSet:
                if oDict.lookup(sWord):
                    dWord[sWord] = i  # warning, what if word in several sets?
                else:
                    echo("Mot inconnu : " + sWord)
        # dictionary of morphologies
        dMorph = {}
        for sWord in dWord:
            dMorph[sWord] = oDict.getMorph(sWord)

    # write file for Python
    sCode = "# generated data (do not edit)\n\n" + \
            "dWord = " + str(dWord) + "\n\n" + \
            "lSet = " + str(lSet) + "\n\n" + \
            "dMorph = " + str(dMorph) + "\n"
    open(sp + "/modules/phonet_data.py", "w", encoding="utf-8").write(sCode)

    if bJS:
        ## write file for JavaScript
        sCode = "{\n" + \
                '    "dWord": ' + json.dumps(dWord, ensure_ascii=False) + ",\n" + \
                '    "lSet": ' + json.dumps(lSet, ensure_ascii=False) + ",\n" + \
                '    "dMorph": ' + json.dumps(dMorph, ensure_ascii=False) + "\n}"
        open(sp + "/modules-js/phonet_data.json", "w",
             encoding="utf-8").write(sCode)
Exemplo n.º 7
0
def createFirefoxExtension(sLang, dVars, bLaunchFx=False):
    "create extension for Firefox"
    print("Building extension for Firefox")
    eraseFolder("_build/xpi/" + sLang)
    dir_util.copy_tree("gc_lang/" + sLang + "/xpi/", "_build/xpi/" + sLang)
    dir_util.copy_tree("grammalecte-js",
                       "_build/xpi/" + sLang + "/grammalecte")
    sHTML, dProperties = createOptionsForXPI(dVars)
    dVars['optionsHTML'] = sHTML
    copyAndFileTemplate("_build/xpi/" + sLang + "/data/about_panel.html",
                        "_build/xpi/" + sLang + "/data/about_panel.html",
                        dVars)
    for sLocale in dProperties.keys():
        spfLocale = "_build/xpi/" + sLang + "/locale/" + sLocale + ".properties"
        if os.path.exists(spfLocale):
            copyAndFileTemplate(spfLocale, spfLocale, dProperties)
        else:
            echo("Locale file not found: " + spfLocale)

    with cd("_build/xpi/" + sLang):
        os.system("jpm xpi")
        if bLaunchFx:
            os.system("jpm run -b nightly")
Exemplo n.º 8
0
def displayStats(lParagraphRules, lSentenceRules):
    echo("  {:>18} {:>18} {:>18} {:>18}".format("DISAMBIGUATOR",
                                                "TEXT PROCESSOR",
                                                "GRAMMAR CHECKING", "REGEX"))
    d, nRule = _calcRulesStats(lParagraphRules)
    echo("§ {:>10} actions {:>10} actions {:>10} actions  in {:>8} rules".
         format(d['='], d['~'], d['-'], nRule))
    d, nRule = _calcRulesStats(lSentenceRules)
    echo("s {:>10} actions {:>10} actions {:>10} actions  in {:>8} rules".
         format(d['='], d['~'], d['-'], nRule))
Exemplo n.º 9
0
def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-d", "--debug", help="display text transformation and disambiguation", action="store_true")
    xParser.add_argument("-p", "--parse", help="parse and display sentence structure", action="store_true")
    xParser.add_argument("-v", "--validate", help="validate text only", action="store_true")
    xParser.add_argument("-a", "--autocorrect", help="try to correct automatically", action="store_true")
    xParser.add_argument("-i", "--ignore-rule", help="ignore this rule (can be used more than once)", action="append", default=[])
    xParser.add_argument("-tf", "--textformatter", help="auto-format text", action="store_true")
    xArgs = xParser.parse_args()

    gce.load()
    gce.setOptions({"html": True})
    oDict = gce.getDictionary()
    oTokenizer = tzr.Tokenizer("fr")
    oLexGraphe = lxg.Lexicographe(oDict)

    if xArgs.textformatter:
        oTF = tf.TextFormatter()

    sInputText = "> "
    sText = _getText(sInputText)

    errors = False

    while sText:
        if xArgs.parse:
            for sWord in sText.split():
                if sWord:
                    echo("* {}".format(sWord))
                    for sMorph in oDict.getMorph(sWord):
                        echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
        else:
            if xArgs.textformatter:
                sText = oTF.formatText(sText)
                sys.stdout.write(sText)

            res = parser(sText, oTokenizer, oDict, bDebug=xArgs.debug, aIgnoredRules=xArgs.ignore_rule)

            if xArgs.validate:
                if res:
                    errors = True
            else:
                if res:
                    showResult(sText, res, xArgs.autocorrect)
                    errors = True
                else:
                    echo("No error found")

        sText = _getText(sInputText)

    if errors:
        sys.exit(1)
Exemplo n.º 10
0
def prepareOptions(lOptionLines):
    "returns a dictionary with data about options"
    sLang = ""
    lStructOpt = []
    lOpt = []
    dOptLabel = {}
    for sLine in lOptionLines:
        sLine = sLine.strip()
        if sLine.startswith("OPTGROUP/"):
            m = re.match("OPTGROUP/([a-z0-9]+):(.+)$", sLine)
            lStructOpt.append(
                (m.group(1), list(map(str.split,
                                      m.group(2).split(",")))))
        elif sLine.startswith("OPTSOFTWARE:"):
            lOpt = [[s, {}] for s in sLine[12:].strip().split()
                    ]  # don’t use tuples (s, {}), because unknown to JS
        elif sLine.startswith("OPT/"):
            m = re.match("OPT/([a-z0-9]+):(.+)$", sLine)
            for i, sOpt in enumerate(m.group(2).split()):
                lOpt[i][1][m.group(1)] = eval(sOpt)
        elif sLine.startswith("OPTLANG/"):
            m = re.match("OPTLANG/([a-z][a-z](?:_[A-Z][A-Z]|)):(.+)$", sLine)
            sLang = m.group(1)[:2]
            dOptLabel[sLang] = {"__optiontitle__": m.group(2).strip()}
        elif sLine.startswith("OPTLABEL/"):
            m = re.match("OPTLABEL/([a-z0-9]+):(.+)$", sLine)
            dOptLabel[sLang][m.group(1)] = list(
                map(str.strip,
                    m.group(2).split("|"))) if "|" in m.group(2) else [
                        m.group(2).strip(), ""
                    ]
        else:
            echo("# Error. Wrong option line in:\n  ")
            echo(sLine)
    echo("  options defined for: " + ", ".join([t[0] for t in lOpt]))
    dOptions = {"lStructOpt": lStructOpt, "dOptLabel": dOptLabel}
    dOptions.update({"dOpt" + k: v for k, v in lOpt})
    return dOptions
Exemplo n.º 11
0
def main():
    xParser = argparse.ArgumentParser()
    xParser.add_argument(
        "-f",
        "--file",
        help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]",
        type=str)
    xParser.add_argument(
        "-ff",
        "--file_to_file",
        help=
        "parse file (UTF-8 required!) and create a result file (*.res.txt)",
        type=str)
    xParser.add_argument("-owe",
                         "--only_when_errors",
                         help="display results only when there are errors",
                         action="store_true")
    xParser.add_argument(
        "-j",
        "--json",
        help=
        "generate list of errors in JSON (only with option --file or --file_to_file)",
        action="store_true")
    xParser.add_argument(
        "-cl",
        "--concat_lines",
        help=
        "concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)",
        action="store_true")
    xParser.add_argument(
        "-tf",
        "--textformatter",
        help=
        "auto-format text according to typographical rules (unavailable with option --concat_lines)",
        action="store_true")
    xParser.add_argument(
        "-tfo",
        "--textformatteronly",
        help=
        "auto-format text and disable grammar checking (only with option --file or --file_to_file)",
        action="store_true")
    xParser.add_argument(
        "-ctx",
        "--context",
        help="return errors with context (only with option --json)",
        action="store_true")
    xParser.add_argument(
        "-w",
        "--width",
        help="width in characters (40 < width < 200; default: 100)",
        type=int,
        choices=range(40, 201, 10),
        default=100)
    xParser.add_argument("-lo",
                         "--list_options",
                         help="list options",
                         action="store_true")
    xParser.add_argument("-lr",
                         "--list_rules",
                         nargs="?",
                         help="list rules [regex pattern as filter]",
                         const="*")
    xParser.add_argument("-on", "--opt_on", nargs="+", help="activate options")
    xParser.add_argument("-off",
                         "--opt_off",
                         nargs="+",
                         help="deactivate options")
    xParser.add_argument("-roff",
                         "--rule_off",
                         nargs="+",
                         help="deactivate rules")
    xParser.add_argument("-d",
                         "--debug",
                         help="debugging mode (only in interactive mode)",
                         action="store_true")
    xArgs = xParser.parse_args()

    gce.load()
    if not xArgs.json:
        echo("Grammalecte v{}".format(gce.version))
    oDict = gce.getDictionary()
    oTokenizer = tkz.Tokenizer("fr")
    oLexGraphe = lxg.Lexicographe(oDict)
    if xArgs.textformatter or xArgs.textformatteronly:
        oTF = tf.TextFormatter()

    if xArgs.list_options or xArgs.list_rules:
        if xArgs.list_options:
            gce.displayOptions("fr")
        if xArgs.list_rules:
            gce.displayRules(None if xArgs.list_rules ==
                             "*" else xArgs.list_rules)
        exit()

    if not xArgs.json:
        xArgs.context = False

    gce.setOptions({"html": True, "latex": True})
    if xArgs.opt_on:
        gce.setOptions(
            {opt: True
             for opt in xArgs.opt_on if opt in gce.getOptions()})
    if xArgs.opt_off:
        gce.setOptions(
            {opt: False
             for opt in xArgs.opt_off if opt in gce.getOptions()})

    if xArgs.rule_off:
        for sRule in xArgs.rule_off:
            gce.ignoreRule(sRule)

    sFile = xArgs.file or xArgs.file_to_file
    if sFile:
        # file processing
        hDst = open(
            sFile[:sFile.rfind(".")] + ".res.txt", "w", encoding="utf-8"
        ) if xArgs.file_to_file or sys.platform == "win32" else None
        bComma = False
        if xArgs.json:
            output(
                '{ "grammalecte": "' + gce.version + '", "lang": "' +
                gce.lang + '", "data" : [\n', hDst)
        if not xArgs.concat_lines:
            # pas de concaténation des lignes
            for i, sText in enumerate(readfile(sFile), 1):
                if xArgs.textformatter or xArgs.textformatteronly:
                    sText = oTF.formatText(sText)
                if xArgs.textformatteronly:
                    output(sText, hDst)
                else:
                    if xArgs.json:
                        sText = generateJSON(
                            i,
                            sText,
                            oTokenizer,
                            oDict,
                            bContext=xArgs.context,
                            bDebug=False,
                            bEmptyIfNoErrors=xArgs.only_when_errors,
                            bReturnText=xArgs.textformatter)
                    else:
                        sText = generateText(
                            sText,
                            oTokenizer,
                            oDict,
                            bDebug=False,
                            bEmptyIfNoErrors=xArgs.only_when_errors,
                            nWidth=xArgs.width)
                    if sText:
                        if xArgs.json and bComma:
                            output(",\n", hDst)
                        output(sText, hDst)
                        bComma = True
                if hDst:
                    echo("§ %d\r" % i, end="", flush=True)
        else:
            # concaténation des lignes non séparées par une ligne vide
            for i, lLine in enumerate(readfileAndConcatLines(sFile), 1):
                sText, lLineSet = txt.createParagraphWithLines(lLine)
                if xArgs.json:
                    sText = generateJSON(
                        i,
                        sText,
                        oTokenizer,
                        oDict,
                        bContext=xArgs.context,
                        bDebug=False,
                        bEmptyIfNoErrors=xArgs.only_when_errors,
                        lLineSet=lLineSet)
                else:
                    sText = generateText(
                        sText,
                        oTokenizer,
                        oDict,
                        bDebug=False,
                        bEmptyIfNoErrors=xArgs.only_when_errors,
                        nWidth=xArgs.width)
                if sText:
                    if xArgs.json and bComma:
                        output(",\n", hDst)
                    output(sText, hDst)
                    bComma = True
                if hDst:
                    echo("§ %d\r" % i, end="", flush=True)
        if xArgs.json:
            output("\n]}\n", hDst)
    else:
        # pseudo-console
        sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n"
        sText = _getText(sInputText)
        while True:
            if sText.startswith("?"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo("* {}".format(sWord))
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(
                                sMorph, oLexGraphe.formatTags(sMorph)))
            elif sText.startswith("/+ "):
                gce.setOptions({
                    opt: True
                    for opt in sText[3:].strip().split()
                    if opt in gce.getOptions()
                })
                echo("done")
            elif sText.startswith("/- "):
                gce.setOptions({
                    opt: False
                    for opt in sText[3:].strip().split()
                    if opt in gce.getOptions()
                })
                echo("done")
            elif sText.startswith("/-- "):
                for sRule in sText[3:].strip().split():
                    gce.ignoreRule(sRule)
                echo("done")
            elif sText.startswith("/++ "):
                for sRule in sText[3:].strip().split():
                    gce.reactivateRule(sRule)
                echo("done")
            elif sText == "/debug" or sText == "/d":
                xArgs.debug = not (xArgs.debug)
                echo("debug mode on" if xArgs.debug else "debug mode off")
            elif sText == "/textformatter" or sText == "/tf":
                xArgs.textformatter = not (xArgs.textformatter)
                echo(
                    "textformatter on" if xArgs.debug else "textformatter off")
            elif sText == "/help" or sText == "/h":
                echo(_HELP)
            elif sText == "/lopt" or sText == "/l":
                gce.displayOptions("fr")
            elif sText.startswith("/lr"):
                sText = sText.strip()
                sFilter = sText[sText.find(" "):].strip(
                ) if sText != "/lr" and sText != "/rules" else None
                gce.displayRules(sFilter)
            elif sText == "/quit" or sText == "/q":
                break
            elif sText.startswith("/rl"):
                # reload (todo)
                pass
            else:
                for sParagraph in txt.getParagraph(sText):
                    if xArgs.textformatter:
                        sText = oTF.formatText(sText)
                    sRes = generateText(
                        sText,
                        oTokenizer,
                        oDict,
                        bDebug=xArgs.debug,
                        bEmptyIfNoErrors=xArgs.only_when_errors,
                        nWidth=xArgs.width)
                    if sRes:
                        echo("\n" + sRes)
                    else:
                        echo("\nNo error found.")
            sText = _getText(sInputText)
Exemplo n.º 12
0
def output(sText, hDst=None):
    if not hDst:
        echo(sText, end="")
    else:
        hDst.write(sText)
Exemplo n.º 13
0
def output (sText, hDst=None):
    if not hDst:
        echo(sText, end="")
    else:
        hDst.write(sText)
Exemplo n.º 14
0
def createAction(sIdAction, sAction, nGroup):
    "returns an action to perform as a tuple (condition, action type, action[, iGroup [, message, URL ]])"
    global FUNCTIONS

    m = re.search(r"([-~=>])(\d*|)>>", sAction)
    if not m:
        echo("# No action at line " + sIdAction)
        return None

    #### CONDITION
    sCondition = sAction[:m.start()].strip()
    if sCondition:
        sCondition = prepareFunction(sCondition)
        FUNCTIONS.append(("c" + sIdAction, sCondition))
        for x in re.finditer("[.](?:group|start|end)[(](\d+)[)]", sCondition):
            if int(x.group(1)) > nGroup:
                print("# Error in groups in condition at line " + sIdAction +
                      " (" + str(nGroup) + " groups only)")
        if ".match" in sCondition:
            echo(
                "# Error. JS compatibility. Don't use .match() in condition, use .search()"
            )
        sCondition = "c" + sIdAction
    else:
        sCondition = None

    #### iGroup / positioning
    iGroup = int(m.group(2)) if m.group(2) else 0
    if iGroup > nGroup:
        echo("# Selected group > group number in regex at line " + sIdAction)

    #### ACTION
    sAction = sAction[m.end():].strip()
    cAction = m.group(1)
    if cAction == "-":
        ## error
        iMsg = sAction.find(" # ")
        sMsg = sAction[iMsg + 3:].strip()
        sAction = sAction[:iMsg].strip()
        sURL = ""
        mURL = re.search("[|] *(https?://.*)", sMsg)
        if mURL:
            sURL = mURL.group(1).strip()
            sMsg = sMsg[:mURL.start(0)].strip()
        if sMsg[0:1] == "=":
            sMsg = prepareFunction(sMsg[1:])
            FUNCTIONS.append(("m" + sIdAction, sMsg))
            for x in re.finditer("group[(](\d+)[)]", sMsg):
                if int(x.group(1)) > nGroup:
                    print("# error in groups in message at line " + sIdAction +
                          " (" + str(nGroup) + " groups only)")
            sMsg = "=m" + sIdAction
        else:
            for x in re.finditer(r"\\(\d+)", sMsg):
                if int(x.group(1)) > nGroup:
                    print("# error in groups in message at line " + sIdAction +
                          " (" + str(nGroup) + " groups only)")
            if re.search("[.]\\w+[(]", sMsg):
                print(
                    "# error in message at line " + sIdAction +
                    ":  This message looks like code. Line should begin with ="
                )

    if sAction[0:1] == "=" or cAction == "=":
        if "define" in sAction and not re.search(
                r"define\(\\\d+ *, *\[.*\] *\)", sAction):
            print("# error in action at line " + sIdAction +
                  ": second argument for define must be a list of strings")
        sAction = prepareFunction(sAction)
        sAction = sAction.replace("m.group(i[4])",
                                  "m.group(" + str(iGroup) + ")")
        for x in re.finditer("group[(](\d+)[)]", sAction):
            if int(x.group(1)) > nGroup:
                print("# error in groups in replacement at line " + sIdAction +
                      " (" + str(nGroup) + " groups only)")
    else:
        for x in re.finditer(r"\\(\d+)", sAction):
            if int(x.group(1)) > nGroup:
                print("# error in groups in replacement at line " + sIdAction +
                      " (" + str(nGroup) + " groups only)")
        if re.search("[.]\\w+[(]", sAction):
            print("# error in action at line " + sIdAction +
                  ":  This action looks like code. Line should begin with =")

    if cAction == "-":
        ## error detected
        if not sAction:
            print("# error in action at line " + sIdAction +
                  ":  This action is empty.")
        if sAction[0:1] == "=":
            FUNCTIONS.append(("s" + sIdAction, sAction[1:]))
            sAction = "=s" + sIdAction
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        if not sMsg:
            print("# error in action at line " + sIdAction +
                  ":  the message is empty.")
        return [sCondition, cAction, sAction, iGroup, sMsg, sURL]
    elif cAction == "~":
        ## text preprocessor
        if not sAction:
            print("# error in action at line " + sIdAction +
                  ":  This action is empty.")
        if sAction[0:1] == "=":
            FUNCTIONS.append(("p" + sIdAction, sAction[1:]))
            sAction = "=p" + sIdAction
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        return [sCondition, cAction, sAction, iGroup]
    elif cAction == "=":
        ## disambiguator
        if sAction[0:1] == "=":
            sAction = sAction[1:]
        if not sAction:
            print("# error in action at line " + sIdAction +
                  ":  This action is empty.")
        FUNCTIONS.append(("d" + sIdAction, sAction))
        sAction = "d" + sIdAction
        return [sCondition, cAction, sAction]
    elif cAction == ">":
        ## no action, break loop if condition is False
        return [sCondition, cAction, ""]
    else:
        echo("# Unknown action at line " + sIdAction)
        return None
Exemplo n.º 15
0
def createRule(s, nIdLine, sLang, bParagraph):
    "returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]"
    global JSREGEXES

    #### OPTIONS
    sLineId = str(nIdLine) + ("p" if bParagraph else "s")
    sRuleId = sLineId
    sOption = False  # False or [a-z0-9]+ name
    tGroups = None  # code for groups positioning (only useful for JavaScript)
    cCaseMode = 'i'  # i: case insensitive,  s: case sensitive,  u: uppercasing allowed
    cWordLimitLeft = '['  # [: word limit, <: no specific limit
    cWordLimitRight = ']'  # ]: word limit, >: no specific limit
    m = re.match("^__([[<]\\w[]>])(/[a-zA-Z0-9]+|)(\\(\\w+\\)|)__ *", s)
    if m:
        cWordLimitLeft = m.group(1)[0]
        cCaseMode = m.group(1)[1]
        cWordLimitRight = m.group(1)[2]
        sOption = m.group(2)[1:] if m.group(2) else False
        if m.group(3):
            sRuleId = m.group(3)[1:-1]
        s = s[m.end(0):]
    else:
        echo("Warning. No option defined at line: " + sLineId)

    #### REGEX TRIGGER
    i = s.find(" <<-")
    if i == -1:
        print("# Error: no condition at line " + sLineId)
        return None
    sRegex = s[:i].strip()
    s = s[i + 4:]

    # JS groups positioning codes
    m = re.search("@@\\S+", sRegex)
    if m:
        tGroups = groupsPositioningCodeToList(sRegex[m.start() + 2:])
        sRegex = sRegex[:m.start()].strip()
    # JS regex
    m = re.search("<js>.+</js>i?", sRegex)
    if m:
        JSREGEXES[sLineId] = m.group(0)
        sRegex = sRegex[:m.start()].strip()
    if "<js>" in sRegex or "</js>" in sRegex:
        print("# Error: JavaScript regex not delimited at line " + sLineId)
        return None

    # quotes ?
    if sRegex.startswith('"') and sRegex.endswith('"'):
        sRegex = sRegex[1:-1]

    ## definitions
    for sDef, sRepl in DEF.items():
        sRegex = sRegex.replace(sDef, sRepl)

    ## count number of groups (must be done before modifying the regex)
    nGroup = countGroupInRegex(sRegex)
    if nGroup > 0:
        if not tGroups:
            print(
                "# warning: groups positioning code for JavaScript should be defined at line "
                + sLineId)
        else:
            if nGroup != len(tGroups):
                print("# error: groups positioning code irrelevant at line " +
                      sLineId)

    ## word limit
    if cWordLimitLeft == '[' and not sRegex.startswith(("^", '’', "'", ",")):
        sRegex = WORDLIMITLEFT + sRegex
    if cWordLimitRight == ']' and not sRegex.endswith(("$", '’', "'", ",")):
        sRegex = sRegex + WORDLIMITRIGHT

    ## casing mode
    if cCaseMode == "i":
        bCaseInsensitive = True
        if not sRegex.startswith("(?i)"):
            sRegex = "(?i)" + sRegex
    elif cCaseMode == "s":
        bCaseInsensitive = False
        sRegex = sRegex.replace("(?i)", "")
    elif cCaseMode == "u":
        bCaseInsensitive = False
        sRegex = sRegex.replace("(?i)", "")
        sRegex = uppercase(sRegex, sLang)
    else:
        print("# Unknown case mode [" + cCaseMode + "] at line " + sLineId)

    ## check regex
    try:
        z = re.compile(sRegex)
    except:
        print("# Regex error at line ", nIdLine)
        echo(sRegex)
        traceback.print_exc()
        return None
    ## groups in non grouping parenthesis
    for x in re.finditer("\(\?:[^)]*\([[\w -]", sRegex):
        print(
            "# Warning: groups inside non grouping parenthesis in regex at line "
            + sLineId)

    #### PARSE ACTIONS
    lActions = []
    nAction = 1
    for sAction in s.split(" <<- "):
        t = createAction(sLineId + "_" + str(nAction), sAction, nGroup)
        nAction += 1
        if t:
            lActions.append(t)
    if not lActions:
        return None

    return [
        sOption, sRegex, bCaseInsensitive, sLineId, sRuleId, lActions, tGroups
    ]
Exemplo n.º 16
0
def main():
    print("Python: " + sys.version)
    xParser = argparse.ArgumentParser()
    xParser.add_argument(
        "lang",
        type=str,
        nargs='+',
        help="lang project to generate (name of folder in /lang)")
    xParser.add_argument("-b",
                         "--build_data",
                         help="launch build_data.py",
                         action="store_true")
    xParser.add_argument("-d",
                         "--dict",
                         help="generate FSA dictionary",
                         action="store_true")
    xParser.add_argument("-t",
                         "--tests",
                         help="run unit tests",
                         action="store_true")
    xParser.add_argument("-p",
                         "--perf",
                         help="run performance tests",
                         action="store_true")
    xParser.add_argument("-js",
                         "--javascript",
                         help="JavaScript build for Firefox",
                         action="store_true")
    xParser.add_argument("-fx",
                         "--firefox",
                         help="Launch Firefox Nightly for XPI testing",
                         action="store_true")
    xParser.add_argument(
        "-i",
        "--install",
        help=
        "install the extension in Writer (path of unopkg must be set in config.ini)",
        action="store_true")
    xArgs = xParser.parse_args()

    dir_util.mkpath("_build")

    for sLang in xArgs.lang:
        if os.path.exists("gc_lang/" + sLang) and os.path.isdir("gc_lang/" +
                                                                sLang):
            xConfig = getConfig(sLang)
            dVars = xConfig._sections['args']

            if xArgs.javascript:
                spXPIBuild = "_build/xpi/" + sLang
                dir_util.mkpath(spXPIBuild + "/data")

            # build data
            if xArgs.dict:
                # fsa builder
                oDAWG = fsa.DAWG(dVars['lexicon_src'], dVars['lang_name'],
                                 dVars['stemming_method'])
                oDAWG.writeInfo("grammalecte/_dictionaries/" +
                                dVars['binary_dic'] + ".info.txt")
                oDAWG.createBinary(
                    "grammalecte/_dictionaries/" + dVars['binary_dic'],
                    int(dVars['fsa_method']))
                if xArgs.javascript:
                    oDic = IBDAWG(dVars['binary_dic'])
                    #oDic.writeAsJSObject("gc_lang/"+sLang+"/modules-js/dictionary.js")
                    oDic.writeAsJSObject("grammalecte-js/_dictionaries/" +
                                         dVars['js_binary_dic'])
            if xArgs.build_data:
                # lang data
                try:
                    build_module = importlib.import_module("gc_lang." + sLang +
                                                           ".build_data")
                except ImportError:
                    print(
                        "# Error. Couldn’t import file build_data.py in folder gc_lang/"
                        + sLang)
                else:
                    build_module.main('gc_lang/' + sLang, xArgs.javascript)

            # make
            sVersion = create(sLang, xConfig, xArgs.install, xArgs.javascript,
                              xArgs.firefox)

            # tests
            if xArgs.tests or xArgs.perf:
                print("> Running tests")
                try:
                    tests = importlib.import_module("tests." + sLang + "_test")
                    echo(tests.__file__)
                except ImportError:
                    print(
                        "# Error. Couldn't import file {}_test.py in folder tests"
                        .format(sLang))
                else:
                    if xArgs.tests:
                        xTestSuite = unittest.TestLoader().loadTestsFromModule(
                            tests)
                        unittest.TextTestRunner().run(xTestSuite)
                    if xArgs.perf:
                        tests.perf(sVersion)
        else:
            print("Folder not found: gc_lang/" + sLang)
Exemplo n.º 17
0
def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str)
    xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-d", "--debug", help="display text transformation and disambiguation", action="store_true")
    xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
    xParser.add_argument("-tf", "--textformatter", help="auto-format text", action="store_true")
    xArgs = xParser.parse_args()

    if sys.platform == "win32" and xArgs.file:
        xArgs.file_to_file = xArgs.file
        xArgs.file = None

    gce.load()
    gce.setOptions({"html": True})
    echo("Grammalecte v{}".format(gce.version))
    oDict = gce.getDictionary()
    oTokenizer = tzr.Tokenizer("fr")
    oLexGraphe = lxg.Lexicographe(oDict)
    if xArgs.textformatter:
        oTF = tf.TextFormatter()

    if xArgs.file:
        if os.path.isfile(xArgs.file):
            with open(xArgs.file, "r", encoding="utf-8") as hSrc:
                for sText in hSrc:
                    if xArgs.textformatter:
                        sText = oTF.formatText(sText)
                    echo(parser(sText, oTokenizer, oDict, nWidth=xArgs.width, bDebug=xArgs.debug))
        else:
            print("# Error: file not found.")
    elif xArgs.file_to_file:
        if os.path.isfile(xArgs.file_to_file):
            with open(xArgs.file_to_file, "r", encoding="utf-8") as hSrc, \
                 open(xArgs.file_to_file[:xArgs.file_to_file.rfind(".")]+".res.txt", "w", encoding="utf-8") as hDst:
                for i, sText in enumerate(hSrc, 1):
                    if xArgs.textformatter:
                        sText = oTF.formatText(sText)
                    hDst.write(parser(sText, oTokenizer, oDict, nWidth=xArgs.width, bDebug=xArgs.debug))
                    print("§ %d\r" % i, end="", flush=True)
        else:
            print("# Error: file not found.")
    else:
        sInputText = "\n~==========~ Écrivez votre texte [Entrée pour quitter] ~==========~\n"
        sText = _getText(sInputText)
        while sText:
            if sText.startswith("?"):
                for sWord in sText[1:].split():
                    if sWord:
                        echo("* {}".format(sWord))
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
            elif sText == "rl":
                # reload (todo)
                pass
            else:
                if xArgs.textformatter:
                    sText = oTF.formatText(sText)
                res = parser(sText, oTokenizer, oDict, nWidth=xArgs.width, bDebug=xArgs.debug, bEmptyIfNoErrors=True)
                echo("\n"+res  if res  else "\nNo error found.")
            sText = _getText(sInputText)
Exemplo n.º 18
0
def make(lRules, sLang, bJavaScript):
    "compile rules"

    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    echo("  parsing rules...")
    global DEF
    lLine = []
    lRuleLine = []
    lTest = []
    lOpt = []
    for i, sLine in enumerate(lRules, 1):
        if sLine.startswith('#END'):
            break
        elif sLine.startswith("#"):
            pass
        elif sLine.startswith("DEF:"):
            m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$",
                         sLine.strip())
            if m:
                DEF["{" + m.group(1) + "}"] = m.group(2)
            else:
                print("Error in definition: ", end="")
                echo(sLine.strip())
        elif sLine.startswith("TEST:"):
            lTest.append("{:<8}".format(i) + "  " + sLine[5:].lstrip())
        elif sLine.startswith("TODO:"):
            pass
        elif sLine.startswith(
            ("OPTGROUP/", "OPTSOFTWARE:", "OPT/", "OPTLANG/", "OPTLABEL/")):
            lOpt.append(sLine)
        elif re.match("[  \t]*$", sLine):
            pass
        elif sLine.startswith(("    ", "\t")):
            lRuleLine[len(lRuleLine) - 1][1] += " " + sLine.strip()
        else:
            lRuleLine.append([i, sLine.strip()])

    # generating options files
    echo("  parsing options...")
    dOptions = prepareOptions(lOpt)
    #echo(dOptions)

    # generating test files
    echo("  generating test files...")
    with open("tests/"+sLang+"/gc_test.txt", "w", encoding="utf-8") as hDstPy, \
         open("gc_lang/"+sLang+"/modules-js/tests_data.json", "w", encoding="utf-8") as hDstJS:
        hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n")
        for sLine in lTest:
            hDstPy.write(sLine)
        hDstJS.write('{ "aData": ' + json.dumps(lTest, ensure_ascii=False) +
                     " }\n")

    # processing
    echo("  preparing rules...")
    bParagraph = True
    lParagraphRules = []
    lSentenceRules = []
    lParagraphRulesJS = []
    lSentenceRulesJS = []

    for nLine, sLine in lRuleLine:
        if sLine:
            if sLine == "[++]":
                bParagraph = False
            else:
                aRule = createRule(sLine, nLine, sLang, bParagraph)
                if aRule:
                    if bParagraph:
                        lParagraphRules.append(aRule)
                        lParagraphRulesJS.append(pyRuleToJS(aRule))
                    else:
                        lSentenceRules.append(aRule)
                        lSentenceRulesJS.append(pyRuleToJS(aRule))

    # creating file with all functions callable by rules
    echo("  creating callables...")
    with open("gc_lang/"+sLang+"/modules/gc_tmp_eval.py", "w", encoding="utf-8") as hDstPy, \
         open("gc_lang/"+sLang+"/modules-js/gc_tmp_eval.js", "w", encoding="utf-8") as hDstJS:
        hDstPy.write("# generated code, do not edit\n")
        hDstJS.write("// generated code, do not edit\nconst oEvalFunc = {\n")
        for sFuncName, sReturn in FUNCTIONS:
            cType = sFuncName[0:1]
            if cType == "c":  # condition
                sParams = "s, sx, m, dDA, sCountry, bCondMemo"
            elif cType == "m":  # message
                sParams = "s, m"
            elif cType == "s":  # suggestion
                sParams = "s, m"
            elif cType == "p":  # preprocessor
                sParams = "s, m"
            elif cType == "d":  # disambiguator
                sParams = "s, m, dDA"
            else:
                print("# Unknown function type in [" + sFuncName + "]")
                continue
            hDstPy.write("def {} ({}):\n".format(sFuncName, sParams))
            hDstPy.write("    return " + sReturn + "\n")
            hDstJS.write("    {}: function ({})".format(sFuncName, sParams) +
                         " {\n")
            hDstJS.write("        return " + py2js(sReturn) + ";\n")
            hDstJS.write("    },\n")
        hDstJS.write("}\n")

    displayStats(lParagraphRules, lSentenceRules)

    d = {
        "paragraph_rules":
        mergeRulesByOption(lParagraphRules),
        "sentence_rules":
        mergeRulesByOption(lSentenceRules),
        "paragraph_rules_JS":
        writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
        "sentence_rules_JS":
        writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS))
    }
    d.update(dOptions)

    return d
Exemplo n.º 19
0
def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str)
    xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-j", "--json", help="generate list of errors in JSON", action="store_true")
    xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
    xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules", action="store_true")
    xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option 'file' or 'file_to_file')", action="store_true")
    xArgs = xParser.parse_args()

    gce.load()
    gce.setOptions({"html": True})
    echo("Grammalecte v{}".format(gce.version))
    oDict = gce.getDictionary()
    oTokenizer = tkz.Tokenizer("fr")
    oLexGraphe = lxg.Lexicographe(oDict)
    if xArgs.textformatter or xArgs.textformatteronly:
        oTF = tf.TextFormatter()

    sFile = xArgs.file or xArgs.file_to_file
    if sFile:
        # file processing
        hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8")  if xArgs.file_to_file or sys.platform == "win32"  else None
        bComma = False
        if xArgs.json:
            output('{ "grammalecte": "'+gce.version+'", "lang": "'+gce.lang+'", "data" : [\n', hDst)
        for i, sText in enumerate(readfile(sFile), 1):
            if xArgs.textformatter or xArgs.textformatteronly:
                sText = oTF.formatText(sText)
            if xArgs.textformatteronly:
                output(sText, hDst)
            else:
                sText = generateText(i, sText, oTokenizer, oDict, xArgs.json, nWidth=xArgs.width)
                if sText:
                    if xArgs.json and bComma:
                        output(",\n", hDst)
                    output(sText, hDst)
                    bComma = True
            if hDst:
                echo("§ %d\r" % i, end="", flush=True)
        if xArgs.json:
            output("\n]}\n", hDst)
    else:
        # pseudo-console
        sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n"
        sText = _getText(sInputText)
        bDebug = False
        while True:
            if sText.startswith("?"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo("* {}".format(sWord))
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
            elif sText.startswith("/+"):
                gce.setOptions({ opt:True  for opt in sText[2:].strip().split()  if opt in gce.getOptions() })
            elif sText.startswith("/-"):
                gce.setOptions({ opt:False  for opt in sText[2:].strip().split()  if opt in gce.getOptions() })
            elif sText == "/debug" or sText == "/d":
                bDebug = not(bDebug)
                echo("debug mode on"  if bDebug  else "debug mode off")
            elif sText == "/help" or sText == "/h":
                echo(_HELP)
            elif sText == "/lopt" or sText == "/l":
                echo("\n".join( [ k+":\t"+str(v)  for k, v  in sorted(gce.getOptions().items()) ] ))
            elif sText == "/quit" or sText == "/q":
                break
            elif sText.startswith("/rl"):
                # reload (todo)
                pass
            else:
                for sParagraph in txt.getParagraph(sText):
                    if xArgs.textformatter:
                        sText = oTF.formatText(sText)
                    sRes = generateText(0, sText, oTokenizer, oDict, xArgs.json, nWidth=xArgs.width, bDebug=bDebug, bEmptyIfNoErrors=True)
                    if sRes:
                        echo("\n" + sRes)
                    else:
                        echo("\nNo error found.")
            sText = _getText(sInputText)
Exemplo n.º 20
0
            aSpellErrs.append(dToken)
    if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
        return ""
    return "  " + json.dumps(
        {
            "iParagraph": iParagraph,
            "lGrammarErrors": aGrammErrs,
            "lSpellingErrors": aSpellErrs
        },
        ensure_ascii=False)


if __name__ == '__main__':

    gce.load()
    echo("Grammalecte v{}".format(gce.version))
    dServerOptions = getServerOptions()
    dGCOptions = getConfigOptions("fr")
    if dGCOptions:
        gce.setOptions(dGCOptions)
    dServerGCOptions = gce.getOptions()
    echo("Grammar options:\n" + " | ".join(
        [k + ": " + str(v) for k, v in sorted(dServerGCOptions.items())]))
    oDict = gce.getDictionary()
    oTokenizer = tkz.Tokenizer("fr")
    oTF = tf.TextFormatter()
    dUser = {}
    userGenerator = genUserId()

    app = Bottle()