def translationByLookup(grammar, language, tgtlanguages, sentence): parser = grammar.languages[language].parse; linearizersList = dict([(lang, grammar.languages[lang].linearize) for lang in tgtlanguages]); queue = [sentence.strip().split()]; transChunks = {}; while len(queue): head = queue[0]; if not len(head): pass; elif len(head) == 1 and head[0].strip(): for lang, wordchoice in translateWord(grammar, language, tgtlanguages, head[0]): transChunks.setdefault(lang, []).append( gf_utils.postprocessor(wordchoice) ); else: try: for parseidx, parse in enumerate( parser(' '.join(head)) ): for lang in tgtlanguages: if linearizersList[lang](parse[1]) == None: transChunks.setdefault(lang, []).append( ' ' ); else: transChunks.setdefault(lang, []).append( gf_utils.postprocessor( linearizersList[lang](parse[1]).strip() ) ); break; except pgf.ParseError, err: #unseenToken = re.findall('"[^"]+?"', err.message)[0][1:-1]; unseenToken = err.message.strip().split()[-1][1:-1]; idx = head.index(unseenToken); queue.insert(1, head[:idx] ); queue.insert(2, [head[idx]] ); queue.insert(3, head[idx+1:] ); del queue[0];
def translateWordsAsChunks(grammar, language, tgtlanguages, word): parser = grammar.languages[language].parse; linearizersList = dict((lang, grammar.languages[lang].linearize) for lang in tgtlanguages); translations = []; try: for parseidx, parse in enumerate( parser(word) ): for lang in tgtlanguages: trans = linearizersList[lang](parse[1]); translations.append(( lang, gf_utils.postprocessor(trans.strip() if trans else '') ) ); break; except pgf.ParseError, err: return [];