Пример #1
0
def get_pipeline_xml(pipeline_type, pipeline_id, auto_pipeline_id):
    path = os.path.join(docvert_root, "pipelines", pipeline_type, pipeline_id,
                        "pipeline.xml")
    if not os.path.exists(path):
        raise docvert_exception.unrecognised_pipeline(
            "Unknown pipeline_id '%s' (checked %s)" % (pipeline_id, path))
    autopipeline_path = None
    xml = lxml.etree.parse(path)
    if xml.getroot().tag == "autopipeline":
        if auto_pipeline_id is None:
            raise docvert_exception.unrecognised_auto_pipeline(
                "Unknown auto pipeline '%s'" % auto_pipeline_id)
        autopipeline_path = os.path.join(docvert_root, "pipelines",
                                         "auto_pipelines", auto_pipeline_id,
                                         "pipeline.xml")
        if not os.path.exists(path):
            raise docvert_exception.unrecognised_auto_pipeline(
                "Unknown auto pipeline '%s'" % auto_pipeline_id)
        custom_stages = "".join(map(lxml.etree.tostring, xml.getroot()))
        autopipeline = ""
        try:
            autopipeline_handle = open(autopipeline_path)
        except IOError, e:
            autopipeline_path_with_default = os.path.join(
                docvert_root, "pipelines", "auto_pipelines",
                "%s.default" % auto_pipeline_id, "pipeline.xml")
            autopipeline_handle = open(autopipeline_path_with_default)
        autopipeline = autopipeline_handle.read().replace(
            '{{custom-stages}}', custom_stages)
        xml = lxml.etree.fromstring(autopipeline)
        xml = xml.getroottree()
Пример #2
0
def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice):
    if files is None and urls is None:
        raise docvert_exception.needs_files_or_urls()
    if pipeline_id is None:
        raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id)
    storage = docvert_storage.get_storage(storage_type_name)
    for filename, data in files.iteritems():
        doc_type = document_type.detect_document_type(data)
        if doc_type != document_type.types.oasis_open_document:
            data = generate_open_document(data, converter)
        document_xml = opendocument.extract_useful_open_document_files(data, storage, filename)
        process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename)
    return storage
Пример #3
0
def get_pipeline_xml(namespaced_pipeline_id, auto_pipeline_id):
    path = os.path.join(docvert_root, "pipelines", namespaced_pipeline_id, "pipeline.xml")
    autopath = None
    if not os.path.exists(path):
        raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s' (checked %s)" % (namespaced_pipeline_id, path))
    xml = lxml.etree.parse(path)
    if xml.getroot().tag == "autopipeline":
        if auto_pipeline_id is None:
            raise docvert_exception.unrecognised_auto_pipeline("Unknown auto pipeline '%s'" % auto_pipeline_id)
        raise Exception("Sorry, auto pipelines aren't implemented yet.")
        autopath = os.path.join(docvert_root, "pipelines", "autopipeline", auto_pipeline_id, "pipeline.xml")
        if not os.path.exists(path):
            raise docvert_exception.unrecognised_auto_pipeline("Unknown auto pipeline '%s'" % auto_pipeline_id)
    return dict(xml=xml, pipeline_directory=os.path.dirname(path), path=path, autopath=autopath)
Пример #4
0
def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice, suppress_errors=False):
    if files is None and urls is None:
        raise docvert_exception.needs_files_or_urls()
    if pipeline_id is None:
        raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id)
    storage = docvert_storage.get_storage(storage_type_name)

    def _title(name, files, data):
        filename = os.path.basename(name).replace('\\','-').replace('/','-').replace(':','-')
        if len(filename) == 0:
            filename = "document.odt"
        if files.has_key(filename):
            if data and hasattr(files[filename], 'read') and files[filename].getvalue() == data:
                return filename
            unique = 1
            potential_filename = filename
            while files.has_key(potential_filename):
                unique += 1
                if filename.count("."):
                    potential_filename = filename.replace(".", "%i." % unique, 1)
                else:
                    potential_filename = filename + str(unique)
            filename = potential_filename
        return filename

    for filename, data in files.iteritems():
        storage.set_friendly_name(filename, filename)

    for url in urls:
        try:
            data = urllib2.urlopen(url, None, http_timeout).read()
            doc_type = document_type.detect_document_type(data)
            if doc_type == document_type.types.html:
                data = html_to_opendocument(data, url)
            filename = _title(url, files, data)
            storage.set_friendly_name(filename, "%s (%s)" % (filename, url))
            files[filename] = StringIO.StringIO(data)
        except IOError, e:
            filename = _title(url, files, None)
            storage.set_friendly_name(filename, "%s (%s)" % (filename, url))
            files[filename] = Exception("Download error from %s: %s" % (url, e))
def get_pipeline_xml(pipeline_type, pipeline_id, auto_pipeline_id):
    path = os.path.join(docvert_root, "pipelines", pipeline_type, pipeline_id, "pipeline.xml")
    if not os.path.exists(path):
        raise docvert_exception.unrecognised_pipeline("Unknown pipeline_id '%s' (checked %s)" % (pipeline_id, path))
    autopipeline_path = None
    xml = lxml.etree.parse(path)
    if xml.getroot().tag == "autopipeline":
        if auto_pipeline_id is None:
            raise docvert_exception.unrecognised_auto_pipeline("Unknown auto pipeline '%s'" % auto_pipeline_id)
        autopipeline_path = os.path.join(docvert_root, "pipelines", "auto_pipelines", auto_pipeline_id, "pipeline.xml")
        if not os.path.exists(path):
            raise docvert_exception.unrecognised_auto_pipeline("Unknown auto pipeline '%s'" % auto_pipeline_id)
        custom_stages = "".join(map(lxml.etree.tostring,xml.getroot()))
        autopipeline = ""
        try:        
            autopipeline_handle = open(autopipeline_path)
        except IOError, e:
            autopipeline_path_with_default = os.path.join(docvert_root, "pipelines", "auto_pipelines", "%s.default" % auto_pipeline_id, "pipeline.xml")
            autopipeline_handle = open(autopipeline_path_with_default)
        autopipeline = autopipeline_handle.read().replace('{{custom-stages}}', custom_stages)
        xml = lxml.etree.fromstring(autopipeline)
        xml = xml.getroottree()
Пример #6
0
def process_conversion(
        files=None,
        urls=None,
        pipeline_id=None,
        pipeline_type="pipelines",
        auto_pipeline_id=None,
        storage_type_name=docvert_storage.storage_type.memory_based,
        converter=converter_type.python_streaming_to_libreoffice):
    if files is None and urls is None:
        raise docvert_exception.needs_files_or_urls()
    if pipeline_id is None:
        raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" %
                                                      pipeline_id)
    storage = docvert_storage.get_storage(storage_type_name)
    for filename, data in files.iteritems():
        doc_type = document_type.detect_document_type(data)
        if doc_type != document_type.types.oasis_open_document:
            data = generate_open_document(data, converter)
        document_xml = opendocument.extract_useful_open_document_files(
            data, storage, filename)
        process_pipeline(document_xml, pipeline_id, pipeline_type,
                         auto_pipeline_id, storage, filename)
    return storage