def load_workbook_img(
    filename,
    read_only=False,
    keep_vba=False,
    guess_types=False,
    data_only=False,
):
    archive = _validate_archive(filename)

    src = archive.read(ARC_CONTENT_TYPES)
    root = fromstring(src)
    package = Manifest.from_tree(root)

    wb = load_workbook(filename, read_only, keep_vba, guess_types, data_only)
    valid_files = archive.namelist()
    wb_sheet_names = wb.sheetnames
    wb_part = _find_workbook_part(package)
    parser = WorkbookParser(archive, wb_part.PartName[1:])
    parser.parse()
    for sheet, rel in parser.find_sheets():
        sheet_name = sheet.name
        ws = wb[sheet_name]
        worksheet_path = rel.target
        if not worksheet_path in valid_files:
            continue
        if not sheet_name in wb_sheet_names:
            continue
        drawings_file = get_drawings_file(worksheet_path, archive, valid_files)
        if drawings_file is not None:
            ws = wb[sheet_name]
            read_drawings(ws, drawings_file, archive, valid_files)
    return wb
示例#2
0
def test_overwritten_default():
    from ..excel import _find_workbook_part

    src = """
    <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
      <Default Extension="xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
    </Types>
    """
    node = fromstring(src)
    package = Manifest.from_tree(node)

    assert _find_workbook_part(package) == Override("/xl/workbook.xml", XLSX)
示例#3
0
def test_find_standard_workbook_part(datadir, wb_type, wb_name):
    from ..excel import _find_workbook_part

    src = """
        <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
        <Override ContentType="{0}"
          PartName="{1}"/>
        </Types>
        """.format(wb_type, wb_name)
    node = fromstring(src)
    package = Manifest.from_tree(node)

    assert _find_workbook_part(package) == Override(wb_name, wb_type)
示例#4
0
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA,
                  data_only=False, guess_types=False, keep_links=True):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param read_only: optimised for reading, content cannot be edited
    :type read_only: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :param keep_links: whether links to external workbooks should be preserved. The default is True
    :type keep_links: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """
    archive = _validate_archive(filename)
    read_only = read_only

    src = archive.read(ARC_CONTENT_TYPES)
    root = fromstring(src)
    package = Manifest.from_tree(root)

    wb_part = _find_workbook_part(package)
    parser = WorkbookParser(archive, wb_part.PartName[1:])
    wb = parser.wb
    wb._data_only = data_only
    wb._read_only = read_only
    wb._keep_links = keep_links
    wb.guess_types = guess_types
    wb.template = wb_part.ContentType in (XLTX, XLTM)
    parser.parse()
    wb._sheets = []

    if read_only and guess_types:
        warnings.warn('Data types are not guessed when using iterator reader')

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED)
        for name in archive.namelist():
            wb.vba_archive.writestr(name, archive.read(name))


    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    if ARC_CORE in valid_files:
        src = fromstring(archive.read(ARC_CORE))
        wb.properties = DocumentProperties.from_tree(src)


    shared_strings = []
    ct = package.find(SHARED_STRINGS)
    if ct is not None:
        strings_path = ct.PartName[1:]
        shared_strings = read_string_table(archive.read(strings_path))


    if ARC_THEME in valid_files:
        wb.loaded_theme = archive.read(ARC_THEME)

    apply_stylesheet(archive, wb) # bind styles to workbook
    pivot_caches = parser.pivot_caches

    # get worksheets
    for sheet, rel in parser.find_sheets():
        sheet_name = sheet.name
        worksheet_path = rel.target
        rels_path = get_rels_path(worksheet_path)
        rels = []
        if rels_path in valid_files:
            rels = get_dependents(archive, rels_path)

        if not worksheet_path in valid_files:
            continue

        if read_only:
            ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None,
                                   shared_strings)

            wb._sheets.append(ws)
        else:
            fh = archive.open(worksheet_path)
            ws = wb.create_sheet(sheet_name)
            ws._rels = rels
            ws_parser = WorkSheetParser(ws, fh, shared_strings)
            ws_parser.parse()

            if rels:
                # assign any comments to cells
                for r in rels.find(COMMENTS_NS):
                    src = archive.read(r.target)
                    comment_sheet = CommentSheet.from_tree(fromstring(src))
                    for ref, comment in comment_sheet.comments:
                        ws[ref].comment = comment

                # preserve link to VML file if VBA
                if (
                    wb.vba_archive is not None
                    and ws.legacy_drawing is not None
                    ):
                    ws.legacy_drawing = rels[ws.legacy_drawing].target

                for t in ws_parser.tables:
                    src = archive.read(t)
                    xml = fromstring(src)
                    table = Table.from_tree(xml)
                    ws.add_table(table)

                pivot_rel = rels.find(TableDefinition.rel_type)
                for r in pivot_rel:
                    pivot_path = r.Target
                    src = archive.read(pivot_path)
                    tree = fromstring(src)
                    pivot = TableDefinition.from_tree(tree)
                    pivot.cache = pivot_caches[pivot.cacheId]
                    ws.add_pivot(pivot)

        ws.sheet_state = sheet.state
        ws._rels = [] # reset

    parser.assign_names()

    #wb._differential_styles.styles =  [] # tables may depened upon dxf

    archive.close()
    return wb
示例#5
0
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA,
                  data_only=False, guess_types=False, keep_links=True):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param read_only: optimised for reading, content cannot be edited
    :type read_only: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :param keep_links: whether links to external workbooks should be preserved. The default is True
    :type keep_links: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """
    archive = _validate_archive(filename)
    read_only = read_only

    parser = WorkbookParser(archive)
    wb = parser.wb
    wb._data_only = data_only
    wb._read_only = read_only
    wb._keep_links = keep_links
    wb.guess_types = guess_types
    parser.parse()
    wb._sheets = []

    if read_only and guess_types:
        warnings.warn('Data types are not guessed when using iterator reader')

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED)
        for name in archive.namelist():
            wb.vba_archive.writestr(name, archive.read(name))


    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    if ARC_CORE in valid_files:
        src = fromstring(archive.read(ARC_CORE))
        wb.properties = DocumentProperties.from_tree(src)

    # is workbook a template or note
    src = archive.read(ARC_CONTENT_TYPES)
    root = fromstring(src)
    package = Manifest.from_tree(root)
    wb.template = XLTX in package or XLTM in package

    shared_strings = []
    ct = package.find(SHARED_STRINGS)
    if ct is not None:
        strings_path = ct.PartName[1:]
        shared_strings = read_string_table(archive.read(strings_path))


    if ARC_THEME in valid_files:
        wb.loaded_theme = archive.read(ARC_THEME)

    apply_stylesheet(archive, wb) # bind styles to workbook

    # get worksheets
    for sheet, rel in parser.find_sheets():
        sheet_name = sheet.name
        worksheet_path = rel.target
        rels_path = get_rels_path(worksheet_path)
        rels = []
        if rels_path in valid_files:
            rels = get_dependents(archive, rels_path)

        if not worksheet_path in valid_files:
            continue

        if read_only:
            ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None,
                                   shared_strings)

            wb._sheets.append(ws)
        else:
            fh = archive.open(worksheet_path)
            ws = wb.create_sheet(sheet_name)
            ws._rels = rels
            ws_parser = WorkSheetParser(ws, fh, shared_strings)
            ws_parser.parse()

            if rels:
                # assign any comments to cells
                for r in rels.find(COMMENTS_NS):
                    src = archive.read(r.target)
                    comment_sheet = CommentSheet.from_tree(fromstring(src))
                    for ref, comment in comment_sheet.comments:
                        ws[ref].comment = comment

                # preserve link to VML file if VBA
                if (
                    wb.vba_archive is not None
                    and ws.legacy_drawing is not None
                    ):
                    ws.legacy_drawing = rels[ws.legacy_drawing].target

        ws.sheet_state = sheet.state
        ws._rels = [] # reset

    parser.assign_names()

    wb._differential_styles.styles =  []

    archive.close()
    return wb
示例#6
0
def check_content_type(workbook_type, archive):
    src = archive.read(ARC_CONTENT_TYPES)
    node = fromstring(src)
    package = Manifest.from_tree(node)
    assert workbook_type in package
示例#7
0
 def read_manifest(self):
     src = self.archive.read(ARC_CONTENT_TYPES)
     root = fromstring(src)
     self.package = Manifest.from_tree(root)