def searchPath(rootpath, include_pairs=True, verbosity=1): lang_code = r'[a-z]{2,3}(?:_[A-Za-z]+)?' type_re = { 'pair': re.compile(r'({0})-({0})\.mode'.format(lang_code)), 'analyzer': re.compile(r'(({0}(-{0})?)-(an)?mor(ph)?)\.mode'.format(lang_code)), 'generator': re.compile(r'(({0}(-{0})?)-gener[A-z]*)\.mode'.format(lang_code)), 'tagger': re.compile(r'(({0}(-{0})?)-tagger)\.mode'.format(lang_code)), 'spell': re.compile(r'(({0}(-{0})?)-spell)\.mode'.format(lang_code)), 'tokenise': re.compile(r'(({0}(-{0})?)-tokenise)\.mode'.format(lang_code)), } modes = { 'pair': [], 'analyzer': [], 'generator': [], 'tagger': [], 'spell': [], 'tokenise': [], } # type: Dict[str, List[Tuple[str, str, str]]] real_root = os.path.abspath(os.path.realpath(rootpath)) for dirpath, dirnames, files in os.walk(rootpath, followlinks=True): if is_loop(dirpath, rootpath, real_root): dirnames[:] = [] continue for filename in [f for f in files if f.endswith('.mode')]: for mtype, regex in type_re.items(): m = regex.match(filename) if m: if mtype != 'pair': modename = m.group(1) # e.g. en-es-anmorph langlist = [ toAlpha3Code(l) for l in m.group(2).split('-') ] lang_pair = '-'.join(langlist) # e.g. en-es dir_of_modes = os.path.dirname(dirpath) mode = (dir_of_modes, modename, lang_pair) modes[mtype].append(mode) elif include_pairs: lang_src = m.group(1) lang_trg = m.group(2) mode = (os.path.join(dirpath, filename), toAlpha3Code(lang_src), toAlpha3Code(lang_trg)) modes[mtype].append(mode) if verbosity > 1: _log_modes(modes) return modes
def searchPath(rootpath, include_pairs=True, verbosity=1): lang_code = r'[a-z]{2,3}(?:_[A-Za-z]+)?' type_re = { 'pair': re.compile(r'({0})-({0})\.mode'.format(lang_code)), 'analyzer': re.compile(r'(({0}(-{0})?)-(an)?mor(ph)?)\.mode'.format(lang_code)), 'generator': re.compile(r'(({0}(-{0})?)-gener[A-z]*)\.mode'.format(lang_code)), 'tagger': re.compile(r'(({0}(-{0})?)-tagger)\.mode'.format(lang_code)) } modes = { 'pair': [], 'analyzer': [], 'generator': [], 'tagger': [], } real_root = os.path.abspath(os.path.realpath(rootpath)) for dirpath, dirnames, files in os.walk(rootpath, followlinks=True): if is_loop(dirpath, rootpath, real_root): dirnames[:]=[] continue for filename in [f for f in files if f.endswith('.mode')]: for mtype, regex in type_re.items(): m = regex.match(filename) if m: if mtype != 'pair': modename = m.group(1) # e.g. en-es-anmorph langlist = [toAlpha3Code(l) for l in m.group(2).split('-')] lang_src = langlist[0] # e.g. en lang_pair = '-'.join(langlist) # e.g. en-es dir_of_modes = os.path.dirname(dirpath) mode = (dir_of_modes, modename, lang_pair) modes[mtype].append(mode) elif include_pairs: lang_src = m.group(1) lang_trg = m.group(2) mode = (os.path.join(dirpath, filename), toAlpha3Code(lang_src), toAlpha3Code(lang_trg)) modes[mtype].append(mode) if verbosity>1: for mtype in modes: if modes[mtype]: logging.info("\"%s\" modes found:\n%s" % ( mtype, "\n".join(["\t".join(m) for m in modes[mtype]]))) return modes
def get(self): mode = toAlpha3Code(self.get_argument('lang')) text = self.get_argument('q') if not text: self.send_error(400, explanation='Missing q argument') return def handleCoverage(coverage): if coverage is None: self.send_error(408, explanation='Request timed out') else: self.sendResponse([coverage]) if mode in self.analyzers: pool = Pool(processes=1) result = pool.apply_async(getCoverage, [text, self.analyzers[mode][0], self.analyzers[mode][1]]) pool.close() @run_async_thread def worker(callback): try: callback(result.get(timeout=self.timeout)) except TimeoutError: pool.terminate() callback(None) coverage = yield tornado.gen.Task(worker) handleCoverage(coverage) else: self.send_error(400, explanation='That mode is not installed')
def get(self): mode = toAlpha3Code(self.get_argument('lang')) toGenerate = self.get_argument('q') def handleGeneration(generated): if generated is None: self.send_error(408, explanation='Request timed out') else: generated = removeLast(toGenerate, generated) self.sendResponse([(generation, lexicalUnits[index]) for (index, generation) in enumerate(generated.split('[SEP]'))]) if mode in self.generators: lexicalUnits = re.findall(r'(\^[^\$]*\$[^\^]*)', toGenerate) if len(lexicalUnits) == 0: lexicalUnits = ['^%s$' % toGenerate] pool = Pool(processes=1) result = pool.apply_async(apertium, ('[SEP]'.join(lexicalUnits), self.generators[mode][0], self.generators[mode][1]), {'formatting': 'none'}) pool.close() @run_async def worker(callback): try: callback(result.get(timeout=self.timeout)) except TimeoutError: pool.terminate() callback(None) generated = yield tornado.gen.Task(worker) handleGeneration(generated) else: self.send_error(400, explanation='That mode is not installed')
def get(self): mode = toAlpha3Code(self.get_argument('lang')) toAnalyze = self.get_argument('q') def handleAnalysis(analysis): if analysis is None: self.send_error(408, explanation='Request timed out') else: lexicalUnits = removeLast(toAnalyze, re.findall(r'\^([^\$]*)\$([^\^]*)', analysis)) self.sendResponse([(lexicalUnit[0], lexicalUnit[0].split('/')[0] + lexicalUnit[1]) for lexicalUnit in lexicalUnits]) if mode in self.analyzers: pool = Pool(processes=1) result = pool.apply_async(apertium, [toAnalyze, self.analyzers[mode][0], self.analyzers[mode][1]]) pool.close() @run_async def worker(callback): try: callback(result.get(timeout=self.timeout)) except TimeoutError: pool.terminate() callback(None) analysis = yield tornado.gen.Task(worker) handleAnalysis(analysis) else: self.send_error(400, explanation='That mode is not installed')
def get(self): text = self.get_argument('q') if not text: return self.send_error(400, explanation='Missing q argument') if cld2: cldResults = cld2.detect(text) if cldResults[0]: possibleLangs = filter(lambda x: x[1] != 'un', cldResults[2]) self.sendResponse({toAlpha3Code(possibleLang[1]): possibleLang[2] for possibleLang in possibleLangs}) else: self.sendResponse({'nob': 100}) # TODO: Some more reasonable response else: def handleCoverages(coverages): self.sendResponse(coverages) pool = Pool(processes=1) result = pool.apply_async(getCoverages, [text, self.analyzers], {'penalize': True}, callback=handleCoverages) pool.close() try: coverages = result.get(timeout=self.timeout) # TODO: Coverages are not actually sent!! except TimeoutError: self.send_error(408, explanation='Request timed out') pool.terminate()
def get(self): text = self.get_argument('q') if not text: return self.send_error(400, explanation='Missing q argument') if cld2: cldResults = cld2.detect(text) if cldResults[0]: possibleLangs = filter(lambda x: x[1] != 'un', cldResults[2]) self.sendResponse({ toAlpha3Code(possibleLang[1]): possibleLang[2] for possibleLang in possibleLangs }) else: self.sendResponse({'nob': 100}) # TODO: Some more reasonable response else: def handleCoverages(coverages): self.sendResponse(coverages) pool = Pool(processes=1) result = pool.apply_async(getCoverages, [text, self.analyzers], {'penalize': True}, callback=handleCoverages) pool.close() try: coverages = result.get(timeout=self.timeout) # TODO: Coverages are not actually sent!! except TimeoutError: self.send_error(408, explanation='Request timed out') pool.terminate()
def get(self): mode = toAlpha3Code(self.get_argument('lang')) text = self.get_argument('q') if not text: self.send_error(400, explanation='Missing q argument') return def handleCoverage(coverage): if coverage is None: self.send_error(408, explanation='Request timed out') else: self.sendResponse([coverage]) if mode in self.analyzers: pool = Pool(processes=1) result = pool.apply_async( getCoverage, [text, self.analyzers[mode][0], self.analyzers[mode][1]]) pool.close() @run_async_thread def worker(callback): try: callback(result.get(timeout=self.timeout)) except TimeoutError: pool.terminate() callback(None) coverage = yield tornado.gen.Task(worker) handleCoverage(coverage) else: self.send_error(400, explanation='That mode is not installed')
def get(self): in_text = self.get_argument('q') in_mode = toAlpha3Code(self.get_argument('lang')) if in_mode in self.analyzers: [path, mode] = self.analyzers[in_mode] formatting = 'txt' commands = [['apertium', '-d', path, '-f', formatting, mode]] result = yield translation.translateSimple(in_text, commands) self.sendResponse(self.postproc_text(in_text, result)) else: self.send_error(400, explanation='That mode is not installed')
def get(self): in_text = self.get_argument('q') in_mode = toAlpha3Code(self.get_argument('lang')) if in_mode in self.generators: [path, mode] = self.generators[in_mode] formatting = 'none' commands = [['apertium', '-d', path, '-f', formatting, mode]] lexical_units, to_generate = self.preproc_text(in_text) result = yield translation.translateSimple(to_generate, commands) self.sendResponse(self.postproc_text(lexical_units, result)) else: self.send_error(400, explanation='That mode is not installed')
def get(self): lang = toAlpha3Code(self.get_argument('lang')) modes = set(self.get_argument('modes').split(' ')) query = self.get_argument('q') if not modes <= {'morph', 'biltrans', 'tagger', 'disambig', 'translate'}: self.send_error(400, explanation='Invalid mode argument') return def handleOutput(output): '''toReturn = {} for mode in modes: toReturn[mode] = outputs[mode] for mode in modes: toReturn[mode] = {outputs[mode + '_inputs'][index]: output for (index, output) in enumerate(outputs[mode])} for mode in modes: toReturn[mode] = [(outputs[mode + '_inputs'][index], output) for (index, output) in enumerate(outputs[mode])] for mode in modes: toReturn[mode] = {'outputs': outputs[mode], 'inputs': outputs[mode + '_inputs']} self.sendResponse(toReturn)''' if output is None: self.send_error(400, explanation='No output') return elif not output: self.send_error(408, explanation='Request timed out') return else: outputs, tagger_lexicalUnits, morph_lexicalUnits = output toReturn = [] for (index, lexicalUnit) in enumerate(tagger_lexicalUnits if tagger_lexicalUnits else morph_lexicalUnits): unitToReturn = {} unitToReturn['input'] = stripTags(lexicalUnit.split('/')[0]) for mode in modes: unitToReturn[mode] = outputs[mode][index] toReturn.append(unitToReturn) if self.get_argument('pos', default=None): requestedPos = int(self.get_argument('pos')) - 1 currentPos = 0 for unit in toReturn: input = unit['input'] currentPos += len(input.split(' ')) if requestedPos < currentPos: self.sendResponse(unit) return else: self.sendResponse(toReturn) pool = Pool(processes=1) result = pool.apply_async(processPerWord, (self.analyzers, self.taggers, lang, modes, query)) pool.close() @run_async_thread def worker(callback): try: callback(result.get(timeout=self.timeout)) except TimeoutError: pool.terminate() callback(None) output = yield tornado.gen.Task(worker) handleOutput(output)
def get(self): lang = toAlpha3Code(self.get_argument('lang')) modes = set(self.get_argument('modes').split(' ')) query = self.get_argument('q') if not modes <= { 'morph', 'biltrans', 'tagger', 'disambig', 'translate' }: self.send_error(400, explanation='Invalid mode argument') return def handleOutput(output): '''toReturn = {} for mode in modes: toReturn[mode] = outputs[mode] for mode in modes: toReturn[mode] = {outputs[mode + '_inputs'][index]: output for (index, output) in enumerate(outputs[mode])} for mode in modes: toReturn[mode] = [(outputs[mode + '_inputs'][index], output) for (index, output) in enumerate(outputs[mode])] for mode in modes: toReturn[mode] = {'outputs': outputs[mode], 'inputs': outputs[mode + '_inputs']} self.sendResponse(toReturn)''' if output is None: self.send_error(400, explanation='No output') return elif not output: self.send_error(408, explanation='Request timed out') return else: outputs, tagger_lexicalUnits, morph_lexicalUnits = output toReturn = [] for (index, lexicalUnit ) in enumerate(tagger_lexicalUnits if tagger_lexicalUnits else morph_lexicalUnits): unitToReturn = {} unitToReturn['input'] = stripTags(lexicalUnit.split('/')[0]) for mode in modes: unitToReturn[mode] = outputs[mode][index] toReturn.append(unitToReturn) if self.get_argument('pos', default=None): requestedPos = int(self.get_argument('pos')) - 1 currentPos = 0 for unit in toReturn: input = unit['input'] currentPos += len(input.split(' ')) if requestedPos < currentPos: self.sendResponse(unit) return else: self.sendResponse(toReturn) pool = Pool(processes=1) result = pool.apply_async( processPerWord, (self.analyzers, self.taggers, lang, modes, query)) pool.close() @run_async_thread def worker(callback): try: callback(result.get(timeout=self.timeout)) except TimeoutError: pool.terminate() callback(None) output = yield tornado.gen.Task(worker) handleOutput(output)