示例#1
0
def _load_workbook(wb, archive, filename, use_iterators, keep_vba):

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        wb.vba_archive = archive

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
        wb.read_workbook_settings(archive.read(ARC_WORKBOOK))
    except KeyError:
        wb.properties = DocumentProperties()

    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}
    try:
        wb.loaded_theme = archive.read(ARC_THEME)  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_properties = read_style_table(archive.read(ARC_STYLE))
    style_table = style_properties.pop('table')
    wb.style_properties = style_properties

    wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    content_types = read_content_types(archive.read(ARC_CONTENT_TYPES))
    sheet_types = [(sheet, contyp) for sheet, contyp in content_types if contyp in WORK_OR_CHART_TYPE]
    sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK))
    worksheet_names = [worksheet for worksheet, sheet_type in zip(sheet_names, sheet_types) if sheet_type[1] == VALID_WORKSHEET]
    for i, sheet_name in enumerate(worksheet_names):

        sheet_codename = 'sheet%d.xml' % (i + 1)
        worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename)

        if not worksheet_path in valid_files:
            continue

        if not use_iterators:
            new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, style_properties['color_index'], keep_vba=keep_vba)
        else:
            xml_source = unpack_worksheet(archive, worksheet_path)
            new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, style_properties['color_index'], filename, sheet_codename)
        wb.add_sheet(new_ws, index=i)

    wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
示例#2
0
def test_read_content_types(datadir, DummyArchive):
    from openpyxl.reader.workbook import read_content_types

    archive = DummyArchive
    datadir.chdir()
    with open("content_types.xml") as src:
        archive.writestr(ARC_CONTENT_TYPES, src.read())

    assert list(read_content_types(archive)) == [
        ('/xl/workbook.xml',
         'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml'
         ),
        ('/xl/worksheets/sheet1.xml',
         'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'
         ),
        ('/xl/chartsheets/sheet1.xml',
         'application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml'
         ),
        ('/xl/worksheets/sheet2.xml',
         'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'
         ),
        ('/xl/theme/theme1.xml',
         'application/vnd.openxmlformats-officedocument.theme+xml'),
        ('/xl/styles.xml',
         'application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml'
         ),
        ('/xl/sharedStrings.xml',
         'application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml'
         ),
        ('/xl/drawings/drawing1.xml',
         'application/vnd.openxmlformats-officedocument.drawing+xml'),
        ('/xl/charts/chart1.xml',
         'application/vnd.openxmlformats-officedocument.drawingml.chart+xml'),
        ('/xl/drawings/drawing2.xml',
         'application/vnd.openxmlformats-officedocument.drawing+xml'),
        ('/xl/charts/chart2.xml',
         'application/vnd.openxmlformats-officedocument.drawingml.chart+xml'),
        ('/xl/calcChain.xml',
         'application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml'
         ),
        ('/docProps/core.xml',
         'application/vnd.openxmlformats-package.core-properties+xml'),
        ('/docProps/app.xml',
         'application/vnd.openxmlformats-officedocument.extended-properties+xml'
         )
    ]
示例#3
0
def test_read_content_types():
    from openpyxl.reader.workbook import read_content_types
    fname = os.path.join(DATADIR, "reader", "contains_chartsheets.xlsx")
    archive = zipfile.ZipFile(fname)
    assert list(read_content_types(archive)) == [
    ('/xl/workbook.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml'),
    ('/xl/worksheets/sheet1.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'),
    ('/xl/chartsheets/sheet1.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml'),
    ('/xl/worksheets/sheet2.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'),
    ('/xl/theme/theme1.xml', 'application/vnd.openxmlformats-officedocument.theme+xml'),
    ('/xl/styles.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml'),
    ('/xl/sharedStrings.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml'),
    ('/xl/drawings/drawing1.xml', 'application/vnd.openxmlformats-officedocument.drawing+xml'),
    ('/xl/charts/chart1.xml', 'application/vnd.openxmlformats-officedocument.drawingml.chart+xml'),
    ('/xl/drawings/drawing2.xml', 'application/vnd.openxmlformats-officedocument.drawing+xml'),
    ('/xl/charts/chart2.xml', 'application/vnd.openxmlformats-officedocument.drawingml.chart+xml'),
    ('/xl/calcChain.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml'),
    ('/docProps/core.xml', 'application/vnd.openxmlformats-package.core-properties+xml'),
    ('/docProps/app.xml', 'application/vnd.openxmlformats-officedocument.extended-properties+xml')
    ]
示例#4
0
def test_read_content_types(datadir):
    from openpyxl.reader.workbook import read_content_types
    datadir.join("reader").chdir()
    archive = zipfile.ZipFile("contains_chartsheets.xlsx")
    assert list(read_content_types(archive)) == [
    ('/xl/workbook.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml'),
    ('/xl/worksheets/sheet1.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'),
    ('/xl/chartsheets/sheet1.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml'),
    ('/xl/worksheets/sheet2.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'),
    ('/xl/theme/theme1.xml', 'application/vnd.openxmlformats-officedocument.theme+xml'),
    ('/xl/styles.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml'),
    ('/xl/sharedStrings.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml'),
    ('/xl/drawings/drawing1.xml', 'application/vnd.openxmlformats-officedocument.drawing+xml'),
    ('/xl/charts/chart1.xml', 'application/vnd.openxmlformats-officedocument.drawingml.chart+xml'),
    ('/xl/drawings/drawing2.xml', 'application/vnd.openxmlformats-officedocument.drawing+xml'),
    ('/xl/charts/chart2.xml', 'application/vnd.openxmlformats-officedocument.drawingml.chart+xml'),
    ('/xl/calcChain.xml', 'application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml'),
    ('/docProps/core.xml', 'application/vnd.openxmlformats-package.core-properties+xml'),
    ('/docProps/app.xml', 'application/vnd.openxmlformats-officedocument.extended-properties+xml')
    ]
示例#5
0
def test_read_content_types(datadir, DummyArchive):
    from openpyxl.reader.workbook import read_content_types

    archive = DummyArchive
    datadir.chdir()
    with open("content_types.xml") as src:
        archive.writestr(ARC_CONTENT_TYPES, src.read())

    assert list(read_content_types(archive)) == [
        ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml", "/xl/workbook.xml"),
        ("application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml", "/xl/worksheets/sheet1.xml"),
        ("application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml", "/xl/chartsheets/sheet1.xml"),
        ("application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml", "/xl/worksheets/sheet2.xml"),
        ("application/vnd.openxmlformats-officedocument.theme+xml", "/xl/theme/theme1.xml"),
        ("application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml", "/xl/styles.xml"),
        ("application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml", "/xl/sharedStrings.xml"),
        ("application/vnd.openxmlformats-officedocument.drawing+xml", "/xl/drawings/drawing1.xml"),
        ("application/vnd.openxmlformats-officedocument.drawingml.chart+xml", "/xl/charts/chart1.xml"),
        ("application/vnd.openxmlformats-officedocument.drawing+xml", "/xl/drawings/drawing2.xml"),
        ("application/vnd.openxmlformats-officedocument.drawingml.chart+xml", "/xl/charts/chart2.xml"),
        ("application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml", "/xl/calcChain.xml"),
        ("application/vnd.openxmlformats-package.core-properties+xml", "/docProps/core.xml"),
        ("application/vnd.openxmlformats-officedocument.extended-properties+xml", "/docProps/app.xml"),
    ]
示例#6
0
文件: excel.py 项目: token97/op_patch
def load_workbook(filename, read_only=False, use_iterators=False, keep_vba=False, guess_types=False, data_only=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param read_only: optimised for reading, content cannot be edited
    :type read_only: bool

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """
    archive = _validate_archive(filename)
    read_only = read_only or use_iterators

    wb = Workbook(guess_types=guess_types, data_only=data_only, read_only=read_only)

    if read_only and guess_types:
        warnings.warn('Data types are not guessed when using iterator reader')

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties(archive.read(ARC_CORE))
    except KeyError:
        wb.properties = DocumentProperties()
    wb.active = read_workbook_settings(archive.read(ARC_WORKBOOK)) or 0

    # what content types do we have?
    cts = dict(read_content_types(archive))

    strings_path = cts.get(SHARED_STRINGS)
    if strings_path is not None:
        if strings_path.startswith("/"):
            strings_path = strings_path[1:]
        shared_strings = read_string_table(archive.read(strings_path))
    else:
        shared_strings = []

    wb.is_template = XLTX in cts or XLTM in cts

    try:
        wb.loaded_theme = archive.read(ARC_THEME)  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    parsed_styles = read_style_table(archive)
    if parsed_styles is not None:
        wb._differential_styles = parsed_styles.differential_styles
        wb._cell_styles = parsed_styles.cell_styles
        wb._named_styles = parsed_styles.named_styles
        wb._colors = parsed_styles.color_index
        wb._borders = parsed_styles.border_list
        wb._fonts = parsed_styles.font_list
        wb._fills = parsed_styles.fill_list
        wb._number_formats = parsed_styles.number_formats
        wb._protections = parsed_styles.protections
        wb._alignments = parsed_styles.alignments
        wb._colors = parsed_styles.color_index

    wb.excel_base_date = read_excel_base_date(archive)

    # get worksheets
    wb._sheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = sheet['path']
        if not worksheet_path in valid_files:
            continue

        if read_only:
            new_ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None,
                                       shared_strings)
            wb._add_sheet(new_ws)
        else:
            fh = archive.open(worksheet_path)
            parser = WorkSheetParser(wb, sheet_name, fh, shared_strings)
            parser.parse()
            new_ws = wb[sheet_name]
        new_ws.sheet_state = sheet['state']

        if wb.vba_archive is not None and new_ws.legacy_drawing is not None:
            # We need to get the file name of the legacy drawing
            dirname, basename = worksheet_path.rsplit('/', 1)
            rels_path = '/'.join((dirname, '_rels', basename + '.rels'))
            rels = get_dependents(archive, rels_path)
            new_ws.legacy_drawing = rels[new_ws.legacy_drawing].target

        if not read_only:
        # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive, valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))
            drawings_file = get_drawings_file(worksheet_path, archive, valid_files)
            if drawings_file is not None:
                read_drawings(new_ws, drawings_file, archive, valid_files)
    wb._differential_styles = [] # reset
    wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))

    wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK))

    if EXTERNAL_LINK in cts:
        rels = read_rels(archive)
        wb._external_links = list(detect_external_links(rels, archive))


    archive.close()
    return wb
示例#7
0
def check_content_type(workbook_type, archive):
    assert workbook_type in dict(read_content_types(archive))
示例#8
0
def _load_workbook(wb, archive, filename, use_iterators, keep_vba):

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        wb.vba_archive = archive

    if use_iterators:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
        wb.read_workbook_settings(archive.read(ARC_WORKBOOK))
    except KeyError:
        wb.properties = DocumentProperties()

    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}
    try:
        wb.loaded_theme = archive.read(
            ARC_THEME
        )  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_properties = read_style_table(archive.read(ARC_STYLE))
    style_table = style_properties.pop('table')
    wb.style_properties = style_properties

    wb.properties.excel_base_date = read_excel_base_date(
        xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    content_types = read_content_types(archive.read(ARC_CONTENT_TYPES))
    sheet_types = [(sheet, contyp) for sheet, contyp in content_types
                   if contyp in WORK_OR_CHART_TYPE]
    sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK))
    worksheet_names = [
        worksheet for worksheet, sheet_type in zip(sheet_names, sheet_types)
        if sheet_type[1] == VALID_WORKSHEET
    ]
    for i, sheet_name in enumerate(worksheet_names):

        sheet_codename = 'sheet%d.xml' % (i + 1)
        worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename)

        if not worksheet_path in valid_files:
            continue

        if not use_iterators:
            new_ws = read_worksheet(
                archive.read(worksheet_path),
                wb,
                sheet_name,
                string_table,
                style_table,
                color_index=style_properties['color_index'],
                keep_vba=keep_vba)
        else:
            new_ws = read_worksheet(
                None,
                wb,
                sheet_name,
                string_table,
                style_table,
                color_index=style_properties['color_index'],
                sheet_codename=sheet_codename)
        wb.add_sheet(new_ws, index=i)

        if not use_iterators:
            # load comments into the worksheet cells
            comments_file = get_comments_file(sheet_codename, archive,
                                              valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

    wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
示例#9
0
def _load_workbook(wb, archive, filename, read_only, keep_vba):
    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
    except KeyError:
        wb.properties = DocumentProperties()
    wb._read_workbook_settings(archive.read(ARC_WORKBOOK))

    # what content types do we have?
    cts = dict(read_content_types(archive))
    rels = dict

    strings_path = cts.get(SHARED_STRINGS)
    if strings_path is not None:
        if strings_path.startswith("/"):
            strings_path = strings_path[1:]
        shared_strings = read_string_table(archive.read(strings_path))
    else:
        shared_strings = []

    try:
        wb.loaded_theme = archive.read(
            ARC_THEME
        )  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_table, color_index, cond_styles = read_style_table(
        archive.read(ARC_STYLE))
    wb.shared_styles = style_table
    wb.style_properties = {'dxf_list': cond_styles}
    wb.cond_styles = cond_styles

    wb.properties.excel_base_date = read_excel_base_date(
        xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = sheet['path']
        if not worksheet_path in valid_files:
            continue

        if not read_only:
            new_ws = read_worksheet(archive.read(worksheet_path),
                                    wb,
                                    sheet_name,
                                    shared_strings,
                                    style_table,
                                    color_index=color_index,
                                    keep_vba=keep_vba)
        else:
            new_ws = read_worksheet(None,
                                    wb,
                                    sheet_name,
                                    shared_strings,
                                    style_table,
                                    color_index=color_index,
                                    worksheet_path=worksheet_path)

        new_ws.sheet_state = sheet.get('state') or 'visible'
        wb._add_sheet(new_ws)

        if not read_only:
            # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive,
                                              valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

            drawings_file = get_drawings_file(worksheet_path, archive,
                                              valid_files)
            if drawings_file is not None:
                read_drawings(new_ws, drawings_file, archive, valid_files)

    wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))

    wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK))

    if EXTERNAL_LINK in cts:
        rels = read_rels(archive)
        wb._external_links = list(detect_external_links(rels, archive))
示例#10
0
文件: excel.py 项目: petres/eurostat
def _load_workbook(wb, archive, filename, read_only, keep_vba):

    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
    except KeyError:
        wb.properties = DocumentProperties()
    wb._read_workbook_settings(archive.read(ARC_WORKBOOK))

    # what content types do we have?
    cts = dict(read_content_types(archive))
    rels = dict

    strings_path = cts.get(SHARED_STRINGS)
    if strings_path is not None:
        if strings_path.startswith("/"):
            strings_path = strings_path[1:]
        shared_strings = read_string_table(archive.read(strings_path))
    else:
        shared_strings = []

    try:
        wb.loaded_theme = archive.read(ARC_THEME)  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_table, color_index, cond_styles = read_style_table(archive.read(ARC_STYLE))
    wb.shared_styles = style_table
    wb.style_properties = {'dxf_list':cond_styles}
    wb.cond_styles = cond_styles

    wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = sheet['path']
        if not worksheet_path in valid_files:
            continue

        if not read_only:
            new_ws = read_worksheet(archive.read(worksheet_path), wb,
                                    sheet_name, shared_strings, style_table,
                                    color_index=color_index,
                                    keep_vba=keep_vba)
        else:
            new_ws = read_worksheet(None, wb, sheet_name, shared_strings,
                                    style_table,
                                    color_index=color_index,
                                    worksheet_path=worksheet_path)

        new_ws.sheet_state = sheet.get('state') or 'visible'
        wb._add_sheet(new_ws)

        if not read_only:
        # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive, valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

    wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))

    if EXTERNAL_LINK in cts:
        rels = read_rels(archive)
        wb._external_links = list(detect_external_links(rels, archive))
示例#11
0
def check_content_type(workbook_type, archive):
    assert workbook_type in dict(read_content_types(archive))