Пример #1
0
    def handle_translation(self, request_id):
        """"
        Handler connecting to the Microsoft Translator service.

        Requires a Bing AppID as documented at MSDN:
        - http://msdn.microsoft.com/en-us/library/ff512421.aspx
        """
        handle = open('/tmp/{0}.message'.format(request_id), 'r+b')
        message = TranslationRequestMessage()
        message.ParseFromString(handle.read())

        source = self.language_code(message.source_language)
        target = self.language_code(message.target_language)

        _source_text = message.source_text.split('\n')

        result = u''
        batches = len(_source_text) / self.__batch__
        for batch in range(batches):
            _start = batch * self.__batch__
            _end = _start + self.__batch__
            text = u'\n'.join(_source_text[_start:_end])
            result += self._batch_translate(source, target, text)
            result += '\n'
        
        last_batch = len(_source_text) % self.__batch__
        if last_batch:
            text = u'\n'.join(_source_text[-last_batch:])
            result += self._batch_translate(source, target, text)
            result += '\n'

        message.target_text = result
        handle.seek(0)
        handle.write(message.SerializeToString())
        handle.close()
Пример #2
0
 def handle_translation(self, request_id):
     """
     Translates text using the Accurat Moses SMT system.
     """
     handle = open('/tmp/{0}.message'.format(request_id), 'r+b')
     message = TranslationRequestMessage()
     message.ParseFromString(handle.read())
     
     # First, we write out the source text to file.
     source = open('/tmp/{0}.source'.format(request_id), 'w')
     source.write(message.source_text.encode('utf-8'))
     
     # Check if the last line ends with a line break, otherwise Moses
     # I/O implementation does not accept the input!
     if not message.source_text.endswith('\n'):
         source.write('\n')
     source.close()
     
     source_language = self.language_code(message.source_language)
     target_language = self.language_code(message.target_language)
     
     # This is a special instance of the Moses worker, with pre-defined
     # knowledge about the ACCURAT Moses configurations.  We use this
     # approach to ensure that only one Moses process at a time can be
     # started; by doing so, we can avoid memory issues.
     MOSES_CMD = '/share/accurat/run/wmt10/bin/moses-irstlm/mosesdecoder' \
       '/mosesdecoder/moses-cmd/src/moses'
     
     MOSES_CONFIG = '/share/accurat/mtserver/accurat/{0}-{1}/' \
       'moses.ini.bin'.format(source_language, target_language)
     
     # Then, we invoke the Moses command reading from the source file
     # and writing to a target file, also inside /tmp.  This blocks until
     # the Moses process finishes.
     shell_cmd = "{0} -f {1} < /tmp/{2}.source > /tmp/{3}.target".format(
       MOSES_CMD, MOSES_CONFIG, request_id, request_id)
     
     proc_stdout, proc_stderr = Popen(shell_cmd, shell=True, stdout=PIPE,
       stderr=PIPE).communicate()
     
     # Wait for some time to ensure file I/O is completed.
     sleep(2)
     
     # We can now load the translation from the target file.
     target = open('/tmp/{0}.target'.format(request_id), 'r')
     target_text = target.read()
     message.target_text = unicode(target_text, 'utf-8')
     target.close()
     
     keyvalue = message.packet_data.add()
     keyvalue.key = 'STDOUT'
     keyvalue.value = proc_stdout
     
     keyvalue = message.packet_data.add()
     keyvalue.key = 'STDERR'
     keyvalue.value = proc_stderr
     
     handle.seek(0)
     handle.write(message.SerializeToString())
     handle.close()
Пример #3
0
    def handle_translation(self, request_id):
        """
        Translates text using the Moses SMT system.
        """
        handle = open('/tmp/{0}.message'.format(request_id), 'r+b')
        message = TranslationRequestMessage()
        message.ParseFromString(handle.read())

        # First, we write out the source text to file.
        source = open('/tmp/{0}.source'.format(request_id), 'w')
        source.write(message.source_text.encode('utf-8'))
        source.close()

        # Then, we invoke the Moses command reading from the source file
        # and writing to a target file, also inside /tmp.  This blocks until
        # the Moses process finishes.
        shell_cmd = "{0} -f {1} < /tmp/{2}.source > /tmp/{3}.target".format(
          self.MOSES_CMD, self.MOSES_CONFIG, request_id, request_id)
        process = Popen(shell_cmd, shell=True)
        process.wait()
        
        # Wait for some time to ensure file I/O is completed.
        sleep(2)

        # We can now load the translation from the target file.
        target = open('/tmp/{0}.target'.format(request_id), 'r')
        target_text = target.read()
        message.target_text = unicode(target_text, 'utf-8')
        target.close()

        handle.seek(0)
        handle.write(message.SerializeToString())
        handle.close()
Пример #4
0
    def handle_translation(self, request_id):
        """
        Translates text using Yahoo! Babel Fish.
        """
        handle = open('/tmp/{0}.message'.format(request_id), 'r+b')
        message = TranslationRequestMessage()
        message.ParseFromString(handle.read())

        source = self.language_code(message.source_language)
        target = self.language_code(message.target_language)

        the_data = urllib.urlencode({'lp': '{0}_{1}'.format(source, target),
          'text': message.source_text.encode('utf-8'), 'ei': 'utf8'})
        the_url = 'http://babelfish.yahoo.com/translate_txt?{0}'.format(
          the_data)
        the_header = {'User-agent': 'Mozilla/5.0'}

        opener = urllib2.build_opener(urllib2.HTTPHandler)
        http_request = urllib2.Request(the_url, None, the_header)
        http_handle = opener.open(http_request)
        content = http_handle.read()
        http_handle.close()

        result_exp = re.compile('type="hidden" name="p" value="([^"]+)',
          re.I|re.U)

        result = result_exp.search(content)

        if result:
            target_text = result.group(1)
            message.target_text = unicode(target_text, 'latin-1')
            handle.seek(0)
            handle.write(message.SerializeToString())

        handle.close()
Пример #5
0
    def handle_translation(self, request_id):
        """
        Dummy translation handler that blocks for a random amount of time.

        Returns all-uppercase version of Text as translation.
        """
        # Block up to 100 seconds...
        interval = 50 + int(random() * 100)
        self.LOGGER.info("Sleeping for {0} seconds...".format(interval))
        sleep(interval)

        # The dummy implementation takes the source text from /tmp/$id.source
        # and writes an upper-cased version of that text to /tmp/$id.target.

        self.LOGGER.debug("Finalizing result for request {0}".format(
          request_id))

        handle = open('{0}/{1}.message'.format(self.message_path,
          request_id), 'r+b')
        message = TranslationRequestMessage()
        message.ParseFromString(handle.read())
        message.target_text = message.source_text.upper()
        handle.seek(0)
        handle.write(message.SerializeToString())
        handle.close()
Пример #6
0
    def handle_translation(self, request_id):
        """
        Translates text from German->English using the Moses SMT system.

        You have to adapt MOSES_CMD and MOSES_CONFIG to the correct values :)
        """
        handle = open("/tmp/{0}.message".format(request_id), "r+b")
        message = TranslationRequestMessage()
        message.ParseFromString(handle.read())

        # First, we write out the source text to file.
        source = open("/tmp/{0}.source".format(request_id), "w")
        source.write(message.source_text.encode("utf-8"))
        source.close()

        # Then, we invoke the Moses command reading from the source file
        # and writing to a target file, also inside /tmp.  This blocks until
        # the Moses process finishes.
        shell_cmd = "{0} -f {1} < /tmp/{2}.source > /tmp/{3}.target".format(
            MOSES_CMD, MOSES_CONFIG, request_id, request_id
        )
        process = Popen(shell_cmd, shell=True)
        process.wait()

        # We can now load the translation from the target file.
        target = open("/tmp/{0}.target".format(request_id), "r")
        message.target_text = unicode(target.read(), "utf-8")
        target.close()

        handle.seek(0)
        handle.write(message.SerializeToString())
        handle.close()
    def handle_translation(self, request_id):
        """
        Translates text using the Accurat Moses SMT system.
        """
        handle = open('/tmp/{0}.message'.format(request_id), 'r+b')
        message = TranslationRequestMessage()
        message.ParseFromString(handle.read())

        # First, we write out the source text to file.
        source = open('/tmp/{0}.source'.format(request_id), 'w')
        source.write(message.source_text.encode('utf-8'))

        # Check if the last line ends with a line break, otherwise Moses
        # I/O implementation does not accept the input!
        if not message.source_text.endswith('\n'):
            source.write('\n')
        source.close()

        source_language = self.language_code(message.source_language)
        target_language = self.language_code(message.target_language)

        # This is a special instance of the Moses worker, with pre-defined
        # knowledge about the ACCURAT Moses configurations.  We use this
        # approach to ensure that only one Moses process at a time can be
        # started; by doing so, we can avoid memory issues.
        MOSES_CMD = '/share/accurat/run/wmt10/bin/moses-irstlm/mosesdecoder' \
          '/mosesdecoder/moses-cmd/src/moses'

        MOSES_CONFIG = '/share/accurat/mtserver/accurat/{0}-{1}/' \
          'moses.ini.bin'.format(source_language, target_language)

        # Then, we invoke the Moses command reading from the source file
        # and writing to a target file, also inside /tmp.  This blocks until
        # the Moses process finishes.
        shell_cmd = "{0} -f {1} < /tmp/{2}.source > /tmp/{3}.target".format(
            MOSES_CMD, MOSES_CONFIG, request_id, request_id)
        process = Popen(shell_cmd, shell=True)
        process.wait()

        # Wait for some time to ensure file I/O is completed.
        sleep(2)

        # We can now load the translation from the target file.
        target = open('/tmp/{0}.target'.format(request_id), 'r')
        target_text = target.read()
        message.target_text = unicode(target_text, 'utf-8')
        target.close()

        handle.seek(0)
        handle.write(message.SerializeToString())
        handle.close()
Пример #8
0
    def handle_translation(self, request_id):
        """
        Translates text from German->English using the Lucy RBMT system.

        Uses the XML-RPC server wrapper running at msv-3207.sb.dfki.de.
        """
        handle = open('/tmp/{0}.message'.format(request_id), 'r+b')
        message = TranslationRequestMessage()
        message.ParseFromString(handle.read())

        proxy = xmlrpclib.ServerProxy('http://msv-3207.sb.dfki.de:9999/')
        assert(proxy.isAlive())

        source = self.language_code(message.source_language)
        target = self.language_code(message.target_language)
        content = proxy.lucyTranslate(message.source_text, source, target)

        # Results are stored in a field with key: '{EN,ES,DE,FR}.txt'.
        target_key = target[:2]
        if target_key == 'SP':
            target_key = 'ES'
        elif target_key == 'GE':
            target_key = 'DE'

        result = content.get('{0}.txt'.format(target_key))
        trees = content.get('tre')

        # We have to parse the result text and filter out Lucy's alternative
        # translations, e.g.:
        #
        #   The apple does not fall far from the <A[tribe|stem|trunk]>.
        #
        # For this example, we will return "...from the tribe." as target text
        # while the "raw" translation as well as the trees are return inside
        # the TranslationRequestMessage's packet_data list.
        if result:
            filter_exp = re.compile('<.\[(.+?)(\|.+?)?\]>', re.I|re.U)
            filtered_result = filter_exp.sub('\g<1>', result)
            message.target_text = unicode(filtered_result, 'utf-8')
            keyvalue = message.packet_data.add()
            keyvalue.key = 'RAW_RESULT'
            keyvalue.value = result

        if trees:
            keyvalue = message.packet_data.add()
            keyvalue.key = 'TREES'
            keyvalue.value = trees

        handle.seek(0)
        handle.write(message.SerializeToString())
        handle.close()
Пример #9
0
    def handle_translation(self, request_id):
        """
        Translation handler that obtains a translation via the Google
        translation web front end.
        """
        handle = open('/tmp/{0}.message'.format(request_id), 'r+b')
        message = TranslationRequestMessage()
        message.ParseFromString(handle.read())

        source = self.language_code(message.source_language)
        target = self.language_code(message.target_language)

        the_url = 'http://translate.google.com/translate_t'
        the_data = urllib.urlencode({'js': 'n', 'sl': source, 'tl': target,
          'text': message.source_text.encode('utf-8')})
        the_header = {'User-agent': 'Mozilla/5.0'}

        opener = urllib2.build_opener(urllib2.HTTPHandler)
        http_request = urllib2.Request(the_url, the_data, the_header)
        http_handle = opener.open(http_request)
        content = http_handle.read()
        http_handle.close()

        result_exp = re.compile(
          '<span id=result_box class="long_text">(.*)</span></div>',
          re.I|re.U|re.S)

        result = result_exp.search(content)

        if result:
            # Normalize HTML line breaks to \n.
            result = result.group(1).replace('<br>', '\n')

            # Extract all <span>...</span> tags containing the translation.
            span_exp = re.compile('<span.*?>([^<]+?)</span>', re.I|re.U|re.S)
            span_iter = span_exp.finditer(result)
            spans = [unicode(match.group(1), 'utf-8') for match in span_iter]

            # Construct target text from list of spans, normalizing \n+ to \n.
            target_text = u'\n'.join([span.strip() for span in spans])  
            multibreaks = re.compile('\n+', re.I|re.U|re.S)
            target_text = multibreaks.sub(u'\n', target_text)

            message.target_text = target_text
            handle.seek(0)
            handle.write(message.SerializeToString())

        handle.close()
Пример #10
0
    def handle_translation(self, request_id):
        """
        Translates text from German->English using Microsoft Translator.

        Requires a Bing AppID as documented at MSDN:
        - http://msdn.microsoft.com/en-us/library/ff512421.aspx
        """
        handle = open('/tmp/{0}.message'.format(request_id), 'r+b')
        message = TranslationRequestMessage()
        message.ParseFromString(handle.read())

        source = self.language_code(message.source_language)
        target = self.language_code(message.target_language)

        app_id = '9259D297CB9F67680C259FD62734B07C0D528312'
        the_data = urllib.urlencode({'appId': app_id, 'from': source,
          'to': target, 'text': message.source_text.encode('utf-8')})
        the_url = 'http://api.microsofttranslator.com/v2/Http.svc/' \
          'Translate?{0}'.format(the_data)
        the_header = {'User-agent': 'Mozilla/5.0'}

        opener = urllib2.build_opener(urllib2.HTTPHandler)
        http_request = urllib2.Request(the_url, None, the_header)
        http_handle = opener.open(http_request)
        content = http_handle.read()
        http_handle.close()

        result_exp = re.compile('<string xmlns="http://schemas.microsoft.' \
          'com/2003/10/Serialization/">(.*?)</string>', re.I|re.U)

        result = result_exp.search(content)

        if result:
            target_text = result.group(1)
            message.target_text = unicode(target_text, 'utf-8')
            handle.seek(0)
            handle.write(message.SerializeToString())

        handle.close()
Пример #11
0
    def handle_translation(self, request_id):
        """
        Translates text using the connected Moses SMT server system.
        """
        handle = open('/tmp/{0}.message'.format(request_id), 'r+b')
        message = TranslationRequestMessage()
        message.ParseFromString(handle.read())

        proxy = xmlrpclib.ServerProxy('{0}:{1}'.format(self.MOSES_HOST,
          self.MOSES_PORT))

        result = []
        for text in message.source_text.split(u'\n'):
            content = proxy.translate({'text': text})
            result.append(content.get('text', '\n'))

        if result:
            message.target_text = u'\n'.join(result)

        handle.seek(0)
        handle.write(message.SerializeToString())
        handle.close()
Пример #12
0
    def handle_translation(self, request_id):
        """
        Handler connecting to the Yahoo! Babel Fish service.
        """
        handle = open('/tmp/{0}.message'.format(request_id), 'r+b')
        message = TranslationRequestMessage()
        message.ParseFromString(handle.read())

        source = self.language_code(message.source_language)
        target = self.language_code(message.target_language)

        # Insert splitter tokens to allow re-construction of original lines.
        _source_text = []
        for source_line in message.source_text.split('\n'):
            _source_text.append(source_line.strip())
            _source_text.append(self.__splitter__)

        result = u''
        batches = len(_source_text) / self.__batch__
        for batch in range(batches):
            _start = batch * self.__batch__
            _end = _start + self.__batch__
            text = u'\n'.join(_source_text[_start:_end])
            result += self._batch_translate(source, target, text)
            result += '\n'
            sleep(30)
        
        last_batch = len(_source_text) % self.__batch__
        if last_batch:
            text = u'\n'.join(_source_text[-last_batch:])
            result += self._batch_translate(source, target, text)
            result += '\n'

        message.target_text = result
        handle.seek(0)
        handle.write(message.SerializeToString())
        handle.close()
Пример #13
0
    def handle_translation(self, request_id):
        """
        Translation handler that obtains a translation via the Google
        translation web front end.
        """
        handle = open('/tmp/{0}.message'.format(request_id), 'r+b')
        message = TranslationRequestMessage()
        message.ParseFromString(handle.read())

        source = self.language_code(message.source_language)
        target = self.language_code(message.target_language)

        the_url = 'http://translate.google.com/translate_t'
        the_data = urllib.urlencode({'js': 'n', 'sl': source, 'tl': target,
          'text': message.source_text.encode('utf-8')})
        the_header = {'User-agent': 'Mozilla/5.0'}

        opener = urllib2.build_opener(urllib2.HTTPHandler)
        http_request = urllib2.Request(the_url, the_data, the_header)
        http_handle = opener.open(http_request)
        content = http_handle.read()
        http_handle.close()

        result_exp = re.compile('<textarea name=utrans wrap=SOFT ' \
          'dir="ltr" id=suggestion.*>(.*?)</textarea>', re.I|re.U)

        result = result_exp.search(content)

        if result:
            target_html = result.group(1)
            target_text = target_html.replace('&lt;br&gt;', '\n')
            message.target_text = unicode(target_text, 'utf-8')
            handle.seek(0)
            handle.write(message.SerializeToString())

        handle.close()