Exemplo n.º 1
0
    def convert_local(self, document, filename=''):
        self.log.info('Converting document with LOCAL tika: %s' % filename)
        temp_file = tempfile.NamedTemporaryFile(delete=False)
        copy_stream(document, temp_file)
        temp_file.close()

        try:
            cmd = ' '.join([self.java_path, '-jar', self.jar_path,
                            '-t', temp_file.name])
            try:
                stdout, stderr = run_process(cmd)
            except ProcessError, e:
                msg = "Conversion with local Tika failed."
                stack_trace = e.message
                raise TikaConversionError(msg, stack_trace=stack_trace)

            text = clean_extracted_plaintext(stdout, filename)
            return text
Exemplo n.º 2
0
    def convert_local(self, document, filename=''):
        self.log.info('Converting document with LOCAL tika: %s' % filename)
        temp_file = tempfile.NamedTemporaryFile(delete=False)
        copy_stream(document, temp_file)
        temp_file.close()

        try:
            cmd = ' '.join(
                [self.java_path, '-jar', self.jar_path, '-t', temp_file.name])
            try:
                stdout, stderr = run_process(cmd)
            except ProcessError, e:
                msg = "Conversion with local Tika failed."
                stack_trace = e.message
                raise TikaConversionError(msg, stack_trace=stack_trace)

            text = clean_extracted_plaintext(stdout, filename)
            return text
Exemplo n.º 3
0
 def test_run_process_returns_stdout_and_stderr(self):
     cmd = "echo stdout; echo stderr 1>&2"
     stdout, stderr = run_process(cmd)
     self.assertEquals(stdout, 'stdout\n')
     self.assertEquals(stderr, 'stderr\n')
Exemplo n.º 4
0
 def test_run_process_raises_process_error_on_nonzero_exit_code(self):
     cmd = "false"
     with self.assertRaises(ProcessError):
         stdout, stderr = run_process(cmd)
Exemplo n.º 5
0
 def test_run_process_returns_stdout_and_stderr(self):
     cmd = "echo stdout; echo stderr 1>&2"
     stdout, stderr = run_process(cmd)
     self.assertEquals(stdout, 'stdout\n')
     self.assertEquals(stderr, 'stderr\n')
Exemplo n.º 6
0
 def test_run_process_raises_process_error_on_nonzero_exit_code(self):
     cmd = "false"
     with self.assertRaises(ProcessError):
         stdout, stderr = run_process(cmd)