def convert_server(self, document, filename=''): base_url = "http://{0}:{1}".format(self.config.host, self.config.port) tika_endpoint = '/'.join((base_url, 'tika')) self.log.info('Converting document with tika JAXRS server: %s' % filename) if isinstance(document, basestring): document = StringIO(document) headers = {'Accept': 'text/plain'} timeout = self.config.timeout response = get_requests_session().put(tika_endpoint, data=document, headers=headers, timeout=timeout) status, body = response.status_code, response.content if not status == 200: msg = ("Conversion with Tika JAXRS server failed " "with status %s. " % status) raise TikaConversionError(msg, status_code=status, stack_trace=body.strip()) text = clean_extracted_plaintext(body, filename) return text
def convert_server(self, document, filename=''): base_url = "http://{0}:{1}".format(self.config.host, self.config.port) tika_endpoint = '/'.join((base_url, 'tika')) self.log.info( 'Converting document with tika JAXRS server: %s' % filename) if isinstance(document, basestring): document = StringIO(document) headers = {'Accept': 'text/plain'} timeout = self.config.timeout response = get_requests_session().put(tika_endpoint, data=document, headers=headers, timeout=timeout) status, body = response.status_code, response.content if not status == 200: msg = ("Conversion with Tika JAXRS server failed " "with status %s. " % status) raise TikaConversionError( msg, status_code=status, stack_trace=body.strip()) text = clean_extracted_plaintext(body, filename) return text
def convert_local(self, document, filename=''): self.log.info('Converting document with LOCAL tika: %s' % filename) temp_file = tempfile.NamedTemporaryFile(delete=False) copy_stream(document, temp_file) temp_file.close() try: cmd = ' '.join([self.java_path, '-jar', self.jar_path, '-t', temp_file.name]) try: stdout, stderr = run_process(cmd) except ProcessError, e: msg = "Conversion with local Tika failed." stack_trace = e.message raise TikaConversionError(msg, stack_trace=stack_trace) text = clean_extracted_plaintext(stdout, filename) return text
def convert_local(self, document, filename=''): self.log.info('Converting document with LOCAL tika: %s' % filename) temp_file = tempfile.NamedTemporaryFile(delete=False) copy_stream(document, temp_file) temp_file.close() try: cmd = ' '.join( [self.java_path, '-jar', self.jar_path, '-t', temp_file.name]) try: stdout, stderr = run_process(cmd) except ProcessError, e: msg = "Conversion with local Tika failed." stack_trace = e.message raise TikaConversionError(msg, stack_trace=stack_trace) text = clean_extracted_plaintext(stdout, filename) return text