Exemplo n.º 1
0
 def find_command(self, name):
     config_name = '%s_bin' % name
     config_name = config_name.replace('-', '_').upper()
     command_path = self.manager.get_env(config_name, find_executable(name))
     if command_path is None:
         raise SystemException('Cannot find binary: %s' % name)
     return command_path
Exemplo n.º 2
0
 def dump_table(self, file_path, table_name, csv_path):
     mdb_export = self.find_command('mdb-export')
     if mdb_export is None:
         raise SystemException('mdb-tools is not available')
     args = [mdb_export, '-b', 'strip', file_path, table_name]
     with open(csv_path, 'w') as fh:
         subprocess.call(args, stdout=fh)
     return csv_path
Exemplo n.º 3
0
def get_convert():
    """Find the best available method to convert documents to the
    PDF format."""
    if not hasattr(settings, '_ingestors_convert'):
        if ServiceDocumentConverter.is_available():
            settings._ingestors_convert = ServiceDocumentConverter()
        elif LocalDocumentConverter.is_available():
            settings._ingestors_convert = LocalDocumentConverter()
        else:
            raise SystemException("Document conversion is not available")
    return settings._ingestors_convert
Exemplo n.º 4
0
def get_ocr():
    """Find the best available method to perform OCR."""
    if not hasattr(settings, '_ingestors_ocr'):
        if GoogleOCRService.is_available():
            settings._ingestors_ocr = GoogleOCRService()
        elif ServiceOCRService.is_available():
            settings._ingestors_ocr = ServiceOCRService()
        elif LocalOCRService.is_available():
            settings._ingestors_ocr = LocalOCRService()
        else:
            raise SystemException("OCR is not available")
    return settings._ingestors_ocr
Exemplo n.º 5
0
 def get_tables(self, local_path):
     mdb_tables = self.find_command('mdb-tables')
     if mdb_tables is None:
         raise SystemException('mdb-tools is not available')
     try:
         output = subprocess.check_output([mdb_tables, local_path])
         return [
             t.strip().decode('utf-8') for t in output.split(b' ')
             if len(t.strip())
         ]
     except subprocess.CalledProcessError as cpe:
         log.warning("Failed to open MDB: %s", cpe)
         raise ProcessingException("Failed to extract Access database.")
Exemplo n.º 6
0
    def exec_command(self, command, *args):
        binary = self.find_command(command)
        if binary is None:
            raise SystemException("Program not found: %s" % command)
        cmd = [binary]
        cmd.extend(args)
        try:
            code = subprocess.call(cmd,
                                   timeout=self.COMMAND_TIMEOUT,
                                   stdout=open(os.devnull, 'wb'))
        except (IOError, OSError) as ose:
            raise ProcessingException('Error: %s' % ose)
        except subprocess.TimeoutExpired:
            raise ProcessingException('Processing timed out.')

        if code != 0:
            raise ProcessingException('Failed: %s' % ' '.join(cmd))