def start_local_lt_server(language): def check_server(): # check for running server global ltserver_local_running if ltserver_local_running: return True # NB: we need at least one character to check (issue #57) data = {'text': ' ', 'language': language} data = urllib.parse.urlencode(data).encode(encoding='ascii') request = urllib.request.Request(ltserver_local, data=data) try: reply = urllib.request.urlopen(request) reply.close() ltserver_local_running = True return True except urllib.error.HTTPError as e: # as we have no real text, this is probably a wrong language code tex2txt.fatal('The server couldn\'t fulfill the request;' + ' error code: ' + repr(e.code) + '\n(probably an unknown language code)') except: return False server_cmd = ltserver_local_cmd if cmdline.lt_server_options[1:]: server_cmd += ' ' + cmdline.lt_server_options[1:] if check_server(): return # compare issue #12 start_new_session = sys.platform != 'win32' try: subprocess.Popen(server_cmd.split(), cwd=cmdline.lt_directory, start_new_session=start_new_session, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) except: tex2txt.fatal('error running ' + repr(server_cmd) + ' in directory ' + repr(cmdline.lt_directory)) # wait for server to be available # sys.stderr.write('=== starting local LT server at "' + cmdline.lt_directory + '":\n=== ' + server_cmd + ' ') sys.stderr.flush() for x in range(20): time.sleep(0.5) sys.stderr.write('.') and sys.stderr.flush() if check_server(): sys.stderr.write('\n') and sys.stderr.flush() return sys.stderr.write('\n') and sys.stderr.flush() tex2txt.fatal('error starting server "' + server_cmd + '"')
def run_textgears(plain): data = { 'key': cmdline.textgears, 'text': plain, } data = urllib.parse.urlencode(data).encode(encoding='ascii') request = urllib.request.Request(textgears_server, data=data) try: reply = urllib.request.urlopen(request) out = reply.read() reply.close() except: tex2txt.fatal('error connecting to "' + textgears_server + '"') out = out.decode(encoding='utf-8') try: dic = json_decoder.decode(out) except: json_fatal('JSON root element') def f(err): offset = json_get(err, 'offset', int) length = json_get(err, 'length', int) return { 'message': 'Error type: ' + json_get(err, 'type', str), 'offset': offset, 'length': length, 'context': checks.create_context(plain, offset, length), 'replacements': list({'value': r} for r in json_get(err, 'better', list)), 'rule': { 'id': 'Not available' }, } return list(f(err) for err in json_get(dic, 'errors', list))
def check_server(): # check for running server global ltserver_local_running if ltserver_local_running: return True # NB: we need at least one character to check (issue #57) data = {'text': ' ', 'language': language} data = urllib.parse.urlencode(data).encode(encoding='ascii') request = urllib.request.Request(ltserver_local, data=data) try: reply = urllib.request.urlopen(request) reply.close() ltserver_local_running = True return True except urllib.error.HTTPError as e: # as we have no real text, this is probably a wrong language code tex2txt.fatal('The server couldn\'t fulfill the request;' + ' error code: ' + repr(e.code) + '\n(probably an unknown language code)') except: return False
def json_fatal(item): tex2txt.fatal('error reading JSON output from proofreader, (sub-)item "' + item + '"')
ltcommand = ((cmdline.lt_command or default_option_lt_command) + ' --json --encoding utf-8') if cmdline.lt_command: ltserver_local_cmd = (cmdline.lt_command + ' --http --port ' + str(ltserver_local_port)) if not cmdline.lt_directory: if cmdline.lt_command: cmdline.lt_directory = '.' else: cmdline.lt_directory = default_option_lt_directory if cmdline.context < 0: # huge context: display whole text cmdline.context = int(1e8) if not (cmdline.file or cmdline.as_server or cmdline.server == 'stop'): tex2txt.fatal('no input file given') if cmdline.plain_input and (cmdline.include or cmdline.replace): tex2txt.fatal('cannot handle --plain-input together with' + ' --include or --replace') lc = parameters.Parameters(cmdline.language).lang_context equation_replacements_display = r'|'.join(set( lc.math_repl_display + lc.math_repl_display_vowel)) equation_replacements_inline = r'|'.join(set( lc.math_repl_inline + lc.math_repl_inline_vowel)) equation_replacements = r'|'.join(set( lc.math_repl_display + lc.math_repl_display_vowel + lc.math_repl_inline + lc.math_repl_inline_vowel)) if cmdline.single_letters and cmdline.single_letters.endswith('||'): repls = (lc.math_repl_display + lc.math_repl_display_vowel + lc.math_repl_inline + lc.math_repl_inline_vowel)
def run_languagetool(plain, language, disable, enable, disablecategories, enablecategories, lt_options): if cmdline.server: # use Web server hosted by LT or local server server = ltserver if cmdline.server == 'lt' else ltserver_local data = {'text': plain, 'language': language} if disable: data['disabledRules'] = disable if enable: data['enabledRules'] = enable if disablecategories: data['disabledCategories'] = disablecategories if enablecategories: data['enabledCategories'] = enablecategories if lt_options: # translate options to entries in HTML request ltopts = lt_options for opt in lt_option_map: entry = lt_option_map[opt] if not any(s in ltopts for s in entry[0]): continue idx = max(ltopts.index(s) for s in entry[0] if s in ltopts) if entry[1]: data[opt] = ltopts[idx + 1] if idx + 1 < len(ltopts) else '' else: data[opt] = 'true' data = urllib.parse.urlencode(data).encode(encoding='ascii') request = urllib.request.Request(server, data=data) for _ in range(2): try: reply = urllib.request.urlopen(request) out = reply.read() reply.close() break except: if cmdline.server != 'my': tex2txt.fatal('error connecting to "' + server + '"') start_local_lt_server(language) else: # use local installation lt_cmd = ltcommand.split() + ['--language', language] if disable: lt_cmd += ['--disable', disable] if enable: lt_cmd += ['--enable', enable] if disablecategories: lt_cmd += ['--disablecategories', disablecategories] if enablecategories: lt_cmd += ['--enablecategories', enablecategories] lt_cmd += lt_options lt_cmd.append('-') # read from stdin try: out = subprocess.run(lt_cmd, cwd=cmdline.lt_directory, input=plain.encode('utf-8'), stdout=subprocess.PIPE) out = out.stdout except: tex2txt.fatal('error running ' + repr(' '.join(lt_cmd)) + ' in directory ' + repr(cmdline.lt_directory)) out = out.decode(encoding='utf-8') try: dic = json_decoder.decode(out) except: json_fatal('JSON root element') matches = json_get(dic, 'matches', list) return matches
def f(m): beg = json_get(m, 'offset', int) if beg < 0 or beg >= len(charmap_tot): tex2txt.fatal('run_proofreader():' + ' bad message read from proofreader') return abs(charmap_tot[beg])
def generate_html(tex, charmap, matches, file): s = 'File "' + file + '" with ' + str(len(matches)) + ' problem(s)' title = protect_html(s) anchor = file anchor_overlap = file + '-@@@' prefix = '<a id="' + anchor + '"></a><H3>' + title + '</H3>\n' # collect data for highlighted places # hdata = [] for m in matches: beg = json_get(m, 'offset', int) end = beg + max(1, json_get(m, 'length', int)) if beg < 0 or end < 0 or beg >= len(charmap) or end >= len(charmap): tex2txt.fatal('generate_html():' + ' bad message read from proofreader') h = tex2txt.Aux() h.unsure = (charmap[beg] < 0 or charmap[max(beg, end - 1)] < 0) h.beg = abs(charmap[beg]) - 1 h.end = abs(charmap[max(beg, end - 1)]) # see issue #21 if h.unsure or h.end <= h.beg: h.end = h.beg + 1 if h.end == h.beg + 1 and tex[h.beg] == '\\': # HACK: # if matched a single \ that is actually followed by macro name: # also highlight the macro name h.end = h.beg + utils.correct_mark_macroname(h.beg, 1, tex) elif h.unsure and tex[h.beg].isalpha(): # HACK: # if unsure: mark till end of word (only letters) s = re.search(r'\A.[^\W0-9_]+', tex[h.beg:]) if s: h.end = h.beg + len(s.group(0)) h.beglin = tex.count('\n', 0, h.beg) h.endlin = tex.count('\n', 0, h.end) + 1 h.lin = h.beglin h.m = m hdata.append(h) # group adjacent matches into regions # regions = [] starts = tex2txt.get_line_starts(tex) for h in hdata: h.beglin = max(h.beglin - cmdline.context, 0) h.endlin = min(h.endlin + cmdline.context, len(starts) - 1) if not regions or h.beglin >= max(h.endlin for h in regions[-1]): # start a new region regions.append([h]) else: # match is part of last region regions[-1].append(h) # produce output # res_tot = '' overlaps = [] line_numbers = [] for reg in regions: # # generate output for one region: # collect all matches in that region # beglin = reg[0].beglin endlin = max(h.endlin for h in reg) res = '' last = starts[beglin] for h in reg: s = generate_highlight(h.m, tex[h.beg:h.end], h.lin + 1, h.unsure) if h.beg < last: # overlapping with last message overlaps.append((s, h.lin + 1)) continue res += protect_html(tex[last:h.beg]) res += s last = h.end res += protect_html(tex[last:starts[endlin]]) res_tot += res + '<br>\n' line_numbers += list(range(beglin, endlin)) + [-1] if not line_numbers: # no problems found: just display first cmdline.context lines endlin = min(cmdline.context, len(starts) - 1) res_tot = protect_html(tex[:starts[endlin]]) line_numbers = list(range(endlin)) if line_numbers: res_tot = add_line_numbers(res_tot, line_numbers) postfix = '' if overlaps: prefix += ('<a href="#' + anchor_overlap + '">' + '<H3>Overlapping message(s) found:' + ' see here</H3></a>\n') postfix = ('<a id="' + anchor_overlap + '"></a><H3>' + protect_html('File "' + file + '":') + ' overlapping message(s)</H3>\n') postfix += '<table cellspacing="0">\n' for (s, lin) in overlaps: postfix += ('<tr><td style="' + number_style + '" align="right" valign="top">' + str(lin) + ' </td><td>' + s + '</td></tr>\n') postfix += '</table>\n' return (title, anchor, prefix + res_tot + postfix, len(matches))