Exemplo n.º 1
0
def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice):
    if files is None and urls is None:
        raise docvert_exception.needs_files_or_urls()
    if pipeline_id is None:
        raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id)
    storage = docvert_storage.get_storage(storage_type_name)
    for filename, data in files.iteritems():
        doc_type = document_type.detect_document_type(data)
        if doc_type != document_type.types.oasis_open_document:
            data = generate_open_document(data, converter)
        document_xml = opendocument.extract_useful_open_document_files(data, storage, filename)
        process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename)
    return storage
Exemplo n.º 2
0
def process_conversion(
        files=None,
        urls=None,
        pipeline_id=None,
        pipeline_type="pipelines",
        auto_pipeline_id=None,
        storage_type_name=docvert_storage.storage_type.memory_based,
        converter=converter_type.python_streaming_to_libreoffice):
    if files is None and urls is None:
        raise docvert_exception.needs_files_or_urls()
    if pipeline_id is None:
        raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" %
                                                      pipeline_id)
    storage = docvert_storage.get_storage(storage_type_name)
    for filename, data in files.iteritems():
        doc_type = document_type.detect_document_type(data)
        if doc_type != document_type.types.oasis_open_document:
            data = generate_open_document(data, converter)
        document_xml = opendocument.extract_useful_open_document_files(
            data, storage, filename)
        process_pipeline(document_xml, pipeline_id, pipeline_type,
                         auto_pipeline_id, storage, filename)
    return storage
Exemplo n.º 3
0
        elif doc_type != document_type.types.oasis_open_document:
            try:
                data = generate_open_document(data, converter)
                doc_type = document_type.types.oasis_open_document
            except Exception, e:
                if not suppress_errors:
                    raise e
                storage.add("%s/index.txt" % filename, str(e))
        if doc_type == document_type.types.oasis_open_document:
            if pipeline_id == "open document": #reserved term, for when people want the Open Document file back directly. Don't bother loading pipeline.
                storage.add("%s/index.odt" % filename, data)
                thumbnail = opendocument.extract_thumbnail(data)
                if thumbnail:
                    storage.add("%s/thumbnail.png" % filename, thumbnail)
            else:
                document_xml = opendocument.extract_useful_open_document_files(data, storage, filename)
                storage.add("%s/opendocument.xml" % filename, document_xml)
                process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename)
                storage.remove("%s/opendocument.xml" % filename)
    return storage

def process_pipeline(initial_pipeline_value, pipeline_id, pipeline_type, auto_pipeline_id, storage, storage_prefix=None):
    pipeline_definition = docvert_pipeline.get_pipeline_definition(pipeline_type, pipeline_id, auto_pipeline_id)
    pipeline = docvert_pipeline.pipeline_processor(storage, pipeline_definition['stages'], pipeline_definition['pipeline_directory'], storage_prefix)
    return pipeline.start(initial_pipeline_value)

def generate_open_document(data, converter=converter_type.python_streaming_to_libreoffice):
    if converter == converter_type.python_streaming_to_libreoffice:
        return docvert_libreoffice.get_client().convert_by_stream(data, docvert_libreoffice.LIBREOFFICE_OPEN_DOCUMENT)
    raise docvert_exception.unrecognised_converter("Unknown converter '%s'" % converter)