def perf(sVersion): print("\nPerformance tests") gce.load() aErrs = gce.parse( "Texte sans importance… utile pour la compilation des règles avant le calcul des perfs." ) with open("./tests/fr/perf.txt", "r", encoding="utf-8") as hSrc, \ open("./tests/fr/perf_memo.txt", "a", encoding="utf-8") as hDst: hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M"))) for sText in (s.strip() for s in hSrc if not s.startswith("#") and s.strip()): with timeblock(sText[:sText.find(".")], hDst): aErrs = gce.parse(sText) hDst.write("\n")
def main(): '''Read the file and run grammalecte on it''' # Load grammalecte. gce.load() dictionary = gce.getDictionary() tokenizer = tkz.Tokenizer("fr") # Read input from stdin or first arg. text_input = [line for line in fileinput.input()] text, lineset = txt.createParagraphWithLines(list(enumerate(text_input))) # Grammar errors gramm_err = gce.parse(text, "FR", bDebug=False, bContext=True) # Spelling errors spell_err = [] for token in tokenizer.genTokens(text): if token['sType'] == "WORD" and not dictionary.isValidToken( token['sValue']): spell_err.append(token) # Get colums and lines. gramm_err, spell_err = txt.convertToXY(gramm_err, spell_err, lineset) # Output for i in list(gramm_err): print('grammaire|{}|{}|{}\n'.format(i['nStartY'] + 1, i['nStartX'] + 1, i['sMessage'])) for i in list(spell_err): print('orthographe|{}|{}|{}\n'.format(i['nStartY'] + 1, i['nStartX'] + 1, 'Mot absent du dictionnaire'))
def parser (sText, oTokenizer, oDict, nWidth=100, bDebug=False, bEmptyIfNoErrors=False): aGrammErrs = gce.parse(sText, "FR", bDebug) aSpellErrs = [] for tToken in oTokenizer.genTokens(sText): if tToken.type == "WORD" and not oDict.isValidToken(tToken.value): aSpellErrs.append(tToken) if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: return "" return txt.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth)
def _getErrors(sText, oTokenizer, oDict, bContext=False, bDebug=False): "returns a tuple: (grammar errors, spelling errors)" aGrammErrs = gce.parse(sText, "FR", bDebug=bDebug, bContext=bContext) aSpellErrs = [] for dToken in oTokenizer.genTokens(sText): if dToken['sType'] == "WORD" and not oDict.isValidToken( dToken['sValue']): aSpellErrs.append(dToken) return aGrammErrs, aSpellErrs
def _getFoundErrors(self, sLine, sOption): if sOption: gce.setOption(sOption, True) aErrs = gce.parse(sLine) gce.setOption(sOption, False) else: aErrs = gce.parse(sLine) sRes = " " * len(sLine) sListErr = "" lAllSugg = [] for dErr in aErrs: sRes = sRes[:dErr["nStart"]] + "~" * ( dErr["nEnd"] - dErr["nStart"]) + sRes[dErr["nEnd"]:] sListErr += " * {sLineId} / {sRuleId} at {nStart}:{nEnd}\n".format( **dErr) lAllSugg.append("|".join(dErr["aSuggestions"])) self._aRuleTested.add(dErr["sLineId"]) return sRes, sListErr, "|||".join(lAllSugg)
def generateText (iParagraph, sText, oTokenizer, oDict, bJSON, nWidth=100, bDebug=False, bEmptyIfNoErrors=False): aGrammErrs = gce.parse(sText, "FR", bDebug) aSpellErrs = [] for dToken in oTokenizer.genTokens(sText): if dToken['sType'] == "WORD" and not oDict.isValidToken(dToken['sValue']): aSpellErrs.append(dToken) if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: return "" if not bJSON: return txt.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth) return " " + json.dumps({ "iParagraph": iParagraph, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
def doProofreading(self, nDocId, rText, rLocale, nStartOfSentencePos, nSuggestedSentenceEndPos, rProperties): xRes = ProofreadingResult() #xRes = uno.createUnoStruct("com.sun.star.linguistic2.ProofreadingResult") xRes.aDocumentIdentifier = nDocId xRes.aText = rText xRes.aLocale = rLocale xRes.nStartOfSentencePosition = nStartOfSentencePos xRes.nStartOfNextSentencePosition = nSuggestedSentenceEndPos xRes.aProperties = () xRes.xProofreader = self xRes.aErrors = () # PATCH FOR LO 4 # Fix for http://nabble.documentfoundation.org/Grammar-checker-Undocumented-change-in-the-API-for-LO-4-td4030639.html if nStartOfSentencePos != 0: return xRes xRes.nStartOfNextSentencePosition = len(rText) # END OF PATCH # WORKAROUND FOR AVOIDING REPEATED ACTIONS ON HEAVY PARAGRAPHS if xRes.nStartOfNextSentencePosition > 3000: nHashedVal = hash(rText) if nHashedVal in self.dResult: return self.dResult[nHashedVal] # WORKAROUND ->>> xRes.nBehindEndOfSentencePosition = xRes.nStartOfNextSentencePosition try: xRes.aErrors = gce.parse(rText, rLocale.Country) # ->>> WORKAROUND if xRes.nStartOfNextSentencePosition > 3000: self.dResult[nHashedVal] = xRes self.nRes += 1 if self.nRes > self.nMaxRes: del self.dResult[self.lLastRes.popleft()] self.nRes = self.nMaxRes self.lLastRes.append(nHashedVal) # END OF WORKAROUND except Exception as e: if sys.version_info.major == 3: traceback.print_exc() return xRes
def doProofreading (self, nDocId, rText, rLocale, nStartOfSentencePos, nSuggestedSentenceEndPos, rProperties): xRes = ProofreadingResult() #xRes = uno.createUnoStruct("com.sun.star.linguistic2.ProofreadingResult") xRes.aDocumentIdentifier = nDocId xRes.aText = rText xRes.aLocale = rLocale xRes.nStartOfSentencePosition = nStartOfSentencePos xRes.nStartOfNextSentencePosition = nSuggestedSentenceEndPos xRes.aProperties = () xRes.xProofreader = self xRes.aErrors = () # PATCH FOR LO 4 # Fix for http://nabble.documentfoundation.org/Grammar-checker-Undocumented-change-in-the-API-for-LO-4-td4030639.html if nStartOfSentencePos != 0: return xRes xRes.nStartOfNextSentencePosition = len(rText) # END OF PATCH # WORKAROUND FOR AVOIDING REPEATED ACTIONS ON HEAVY PARAGRAPHS if xRes.nStartOfNextSentencePosition > 3000: nHashedVal = hash(rText) if nHashedVal in self.dResult: return self.dResult[nHashedVal] # WORKAROUND ->>> xRes.nBehindEndOfSentencePosition = xRes.nStartOfNextSentencePosition try: xRes.aErrors = gce.parse(rText, rLocale.Country) # ->>> WORKAROUND if xRes.nStartOfNextSentencePosition > 3000: self.dResult[nHashedVal] = xRes self.nRes += 1 if self.nRes > self.nMaxRes: del self.dResult[self.lLastRes.popleft()] self.nRes = self.nMaxRes self.lLastRes.append(nHashedVal) # END OF WORKAROUND except Exception as e: if sys.version_info.major == 3: traceback.print_exc() return xRes
def parseParagraph(iParagraph, sText, oTokenizer, oDict, dOptions, bDebug=False, bEmptyIfNoErrors=False): aGrammErrs = gce.parse(sText, "FR", bDebug, dOptions) aSpellErrs = [] for dToken in oTokenizer.genTokens(sText): if dToken['sType'] == "WORD" and not oDict.isValidToken( dToken['sValue']): aSpellErrs.append(dToken) if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: return "" return " " + json.dumps( { "iParagraph": iParagraph, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
def parser (sText, oTokenizer, oDict, bDebug=False, aIgnoredRules=()): aGrammErrs = gce.parse(sText, "FR", bDebug) aSpellErrs = [] if bDebug: print(aGrammErrs) if len(aIgnoredRules): lGrammErrs = list(aGrammErrs) for dGrammErr in aGrammErrs: if dGrammErr['sRuleId'] in aIgnoredRules: lGrammErrs.remove(dGrammErr) aGrammErrs = tuple(lGrammErrs) for tToken in oTokenizer.genTokens(sText): if tToken.type == "WORD" and not oDict.isValidToken(tToken.value): aSpellErrs.append(tToken) if not aGrammErrs and not aSpellErrs: return False else: return [aGrammErrs, aSpellErrs]