def test_read_row(datadir, DummyWorkbook, ReadOnlyWorksheet): datadir.join("reader").chdir() src = b""" <sheetData xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" > <row r="1" spans="4:27"> <c r="D1"> <v>1</v> </c> <c r="K1"> <v>0.01</v> </c> <c r="AA1"> <v>100</v> </c> </row> </sheetData> """ ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "bug393-worksheet.xml", []) xml = fromstring(src) row = tuple(ws._get_row(xml, 11, 11)) values = [c.value for c in row] assert values == [0.01] row = tuple(ws._get_row(xml, 1, 11)) values = [c.value for c in row] assert values == [None, None, None, 1, None, None, None, None, None, None, 0.01]
def test_force_dimension(datadir, DummyWorkbook, ReadOnlyWorksheet): datadir.join("reader").chdir() ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "sheet2_no_dimension.xml", []) dims = ws.calculate_dimension(True) assert dims == "A1:AA30"
def test_get_empty_cells_nonempty_row(datadir, DummyWorkbook, ReadOnlyWorksheet): """Fix for issue #908. Get row slice which only contains empty cells in a row containing non-empty cells earlier in the row. """ datadir.join("reader").chdir() src = b""" <sheetData xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" > <row r="1" spans="4:27"> <c r="A4"> <v>1</v> </c> </row> </sheetData> """ ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "", []) xml = fromstring(src) min_col = 8 max_col = 9 row = tuple(ws._get_row(xml, min_col=min_col, max_col=max_col)) assert len(row) == 2 assert all(cell is EMPTY_CELL for cell in row) values = [cell.value for cell in row] assert values == [None, None]
def test_read_without_coordinates(DummyWorkbook, ReadOnlyWorksheet): ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "", ["Whatever"] * 10) src = """ <row xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"> <c t="s"> <v>2</v> </c> <c t="s"> <v>4</v> </c> <c t="s"> <v>3</v> </c> <c t="s"> <v>6</v> </c> <c t="s"> <v>9</v> </c> </row> """ element = fromstring(src) row = tuple(ws._get_row(element, min_col=1, max_col=None, row_counter=1)) assert row[0].value == "Whatever"
def test_get_max_cell(datadir, DummyWorkbook, ReadOnlyWorksheet, filename): datadir.join("reader").chdir() DummyWorkbook._archive.write(filename, "sheet1.xml") ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "sheet1.xml", []) ws._shared_strings = ['A', 'B'] rows = tuple(ws.rows) assert rows[-1][-1].coordinate == "AA30"
def test_iter_rows_empty_rows(datadir, DummyWorkbook, ReadOnlyWorksheet): datadir.join("reader").chdir() ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "empty_rows.xml", []) rows = tuple(ws.iter_rows(min_row=1, min_col=1, max_row=10, max_col=10)) assert len(rows) == 7 assert rows[0][0].value is None assert rows[6][6].value is None
def test_force_dimension(datadir, DummyWorkbook, ReadOnlyWorksheet): datadir.join("reader").chdir() wb = DummyWorkbook wb._archive.write("sheet2_no_dimension.xml", "sheet1.xml") ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "sheet1.xml", []) ws._shared_strings = ['A', 'B'] dims = ws.calculate_dimension(True) assert dims == "A1:AA30"
def test_read_empty_row(datadir, DummyWorkbook, ReadOnlyWorksheet): ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "", []) src = """ <row r="2" xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" /> """ element = fromstring(src) row = ws._get_row(element, max_col=10) row = tuple(row) assert len(row) == 10
def test_read_cell_from_empty_row(DummyWorkbook, ReadOnlyWorksheet, row, column): src = BytesIO() src.write(b"""<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"> <sheetData> <row r="2" /> <row r="4" /> </sheetData> </worksheet> """) src.seek(0) ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "", []) ws._xml = src cell = ws._get_cell(row, column) assert cell is EMPTY_CELL
def test_read_hyperlinks_read_only(datadir, DummyWorkbook, ReadOnlyWorksheet): datadir.join("reader").chdir() wb = DummyWorkbook wb._archive.write("bug393-worksheet.xml", "sheet1.xml") ws = ReadOnlyWorksheet(wb, "Sheet", "sheet1.xml", ['SOMETEXT']) assert ws['F2'].value is None
def test_read_hyperlinks_read_only(datadir, Workbook, ReadOnlyWorksheet): datadir.join("reader").chdir() filename = 'bug328_hyperlinks.xml' ws = ReadOnlyWorksheet(Workbook(data_only=True, read_only=True), "Sheet", "", filename, ['SOMETEXT']) assert ws['F2'].value is None
def test_ctor(datadir, DummyWorkbook, ReadOnlyWorksheet, filename, expected): datadir.join("reader").chdir() wb = DummyWorkbook wb._archive.write(filename, "sheet1.xml") with open(filename) as src: ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "sheet1.xml", []) assert (ws.min_row, ws.min_column, ws.max_row, ws.max_column) == expected
def test_read_empty_rows(datadir, DummyWorkbook, ReadOnlyWorksheet): datadir.join("reader").chdir() wb = DummyWorkbook wb._archive.write("empty_rows.xml", "sheet1.xml") ws = ReadOnlyWorksheet(wb, "Sheet", "sheet1.xml", []) rows = tuple(ws.rows) assert len(rows) == 7
def test_read_with_missing_cells(datadir, DummyWorkbook, ReadOnlyWorksheet): datadir.join("reader").chdir() wb = DummyWorkbook wb._archive.write("bug393-worksheet.xml", "sheet1.xml") ws = ReadOnlyWorksheet(wb, "Sheet", "sheet1.xml", []) rows = tuple(ws.rows) row = rows[1] # second row values = [c.value for c in row] assert values == [None, None, 1, 2, 3] row = rows[3] # fourth row values = [c.value for c in row] assert values == [1, 2, None, None, 3]
def test_read_with_missing_cells(datadir, DummyWorkbook, ReadOnlyWorksheet): datadir.join("reader").chdir() filename = "bug393-worksheet.xml" ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", filename, []) rows = tuple(ws.rows) row = rows[1] # second row values = [c.value for c in row] assert values == [None, None, 1, 2, 3] row = rows[3] # fourth row values = [c.value for c in row] assert values == [1, 2, None, None, 3]
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA, data_only=False, guess_types=False, keep_links=True): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param read_only: optimised for reading, content cannot be edited :type read_only: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :param keep_links: whether links to external workbooks should be preserved. The default is True :type keep_links: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ archive = _validate_archive(filename) read_only = read_only src = archive.read(ARC_CONTENT_TYPES) root = fromstring(src) package = Manifest.from_tree(root) wb_part = _find_workbook_part(package) parser = WorkbookParser(archive, wb_part.PartName[1:]) wb = parser.wb wb._data_only = data_only wb._read_only = read_only wb._keep_links = keep_links wb.guess_types = guess_types wb.template = wb_part.ContentType in (XLTX, XLTM) parser.parse() wb._sheets = [] if read_only and guess_types: warnings.warn('Data types are not guessed when using iterator reader') valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED) for name in archive.namelist(): wb.vba_archive.writestr(name, archive.read(name)) if read_only: wb._archive = ZipFile(filename) # get workbook-level information if ARC_CORE in valid_files: src = fromstring(archive.read(ARC_CORE)) wb.properties = DocumentProperties.from_tree(src) shared_strings = [] ct = package.find(SHARED_STRINGS) if ct is not None: strings_path = ct.PartName[1:] shared_strings = read_string_table(archive.read(strings_path)) if ARC_THEME in valid_files: wb.loaded_theme = archive.read(ARC_THEME) apply_stylesheet(archive, wb) # bind styles to workbook pivot_caches = parser.pivot_caches # get worksheets for sheet, rel in parser.find_sheets(): sheet_name = sheet.name worksheet_path = rel.target rels_path = get_rels_path(worksheet_path) rels = [] if rels_path in valid_files: rels = get_dependents(archive, rels_path) if not worksheet_path in valid_files: continue if read_only: ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None, shared_strings) wb._sheets.append(ws) else: fh = archive.open(worksheet_path) ws = wb.create_sheet(sheet_name) ws._rels = rels ws_parser = WorkSheetParser(ws, fh, shared_strings) ws_parser.parse() if rels: # assign any comments to cells for r in rels.find(COMMENTS_NS): src = archive.read(r.target) comment_sheet = CommentSheet.from_tree(fromstring(src)) for ref, comment in comment_sheet.comments: ws[ref].comment = comment # preserve link to VML file if VBA if ( wb.vba_archive is not None and ws.legacy_drawing is not None ): ws.legacy_drawing = rels[ws.legacy_drawing].target for t in ws_parser.tables: src = archive.read(t) xml = fromstring(src) table = Table.from_tree(xml) ws.add_table(table) pivot_rel = rels.find(TableDefinition.rel_type) for r in pivot_rel: pivot_path = r.Target src = archive.read(pivot_path) tree = fromstring(src) pivot = TableDefinition.from_tree(tree) pivot.cache = pivot_caches[pivot.cacheId] ws.add_pivot(pivot) ws.sheet_state = sheet.state ws._rels = [] # reset parser.assign_names() #wb._differential_styles.styles = [] # tables may depened upon dxf archive.close() return wb
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA, data_only=False, guess_types=False, keep_links=True): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param read_only: optimised for reading, content cannot be edited :type read_only: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :param keep_links: whether links to external workbooks should be preserved. The default is True :type keep_links: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ archive = _validate_archive(filename) read_only = read_only parser = WorkbookParser(archive) wb = parser.wb wb._data_only = data_only wb._read_only = read_only wb._keep_links = keep_links wb.guess_types = guess_types parser.parse() wb._sheets = [] if read_only and guess_types: warnings.warn('Data types are not guessed when using iterator reader') valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED) for name in archive.namelist(): wb.vba_archive.writestr(name, archive.read(name)) if read_only: wb._archive = ZipFile(filename) # get workbook-level information if ARC_CORE in valid_files: src = fromstring(archive.read(ARC_CORE)) wb.properties = DocumentProperties.from_tree(src) # is workbook a template or note src = archive.read(ARC_CONTENT_TYPES) root = fromstring(src) package = Manifest.from_tree(root) wb.template = XLTX in package or XLTM in package shared_strings = [] ct = package.find(SHARED_STRINGS) if ct is not None: strings_path = ct.PartName[1:] shared_strings = read_string_table(archive.read(strings_path)) if ARC_THEME in valid_files: wb.loaded_theme = archive.read(ARC_THEME) apply_stylesheet(archive, wb) # bind styles to workbook # get worksheets for sheet, rel in parser.find_sheets(): sheet_name = sheet.name worksheet_path = rel.target rels_path = get_rels_path(worksheet_path) rels = [] if rels_path in valid_files: rels = get_dependents(archive, rels_path) if not worksheet_path in valid_files: continue if read_only: ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None, shared_strings) wb._sheets.append(ws) else: fh = archive.open(worksheet_path) ws = wb.create_sheet(sheet_name) ws._rels = rels ws_parser = WorkSheetParser(ws, fh, shared_strings) ws_parser.parse() if rels: # assign any comments to cells for r in rels.find(COMMENTS_NS): src = archive.read(r.target) comment_sheet = CommentSheet.from_tree(fromstring(src)) for ref, comment in comment_sheet.comments: ws[ref].comment = comment # preserve link to VML file if VBA if ( wb.vba_archive is not None and ws.legacy_drawing is not None ): ws.legacy_drawing = rels[ws.legacy_drawing].target ws.sheet_state = sheet.state ws._rels = [] # reset parser.assign_names() wb._differential_styles.styles = [] archive.close() return wb
def load_workbook(filename, read_only=False, use_iterators=False, keep_vba=False, guess_types=False, data_only=False): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param read_only: optimised for reading, content cannot be edited :type read_only: bool :param use_iterators: use lazy load for cells :type use_iterators: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ archive = _validate_archive(filename) read_only = read_only or use_iterators wb = Workbook(guess_types=guess_types, data_only=data_only, read_only=read_only) if read_only and guess_types: warnings.warn('Data types are not guessed when using iterator reader') valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: try: f = open(filename, 'rb') s = f.read() f.close() except: pos = filename.tell() filename.seek(0) s = filename.read() filename.seek(pos) wb.vba_archive = ZipFile(BytesIO(s), 'r') if read_only: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties(archive.read(ARC_CORE)) except KeyError: wb.properties = DocumentProperties() wb.active = read_workbook_settings(archive.read(ARC_WORKBOOK)) or 0 # what content types do we have? cts = dict(read_content_types(archive)) strings_path = cts.get(SHARED_STRINGS) if strings_path is not None: if strings_path.startswith("/"): strings_path = strings_path[1:] shared_strings = read_string_table(archive.read(strings_path)) else: shared_strings = [] wb.is_template = XLTX in cts or XLTM in cts try: wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" parsed_styles = read_style_table(archive) if parsed_styles is not None: wb._differential_styles = parsed_styles.differential_styles wb._cell_styles = parsed_styles.cell_styles wb._named_styles = parsed_styles.named_styles wb._colors = parsed_styles.color_index wb._borders = parsed_styles.border_list wb._fonts = parsed_styles.font_list wb._fills = parsed_styles.fill_list wb._number_formats = parsed_styles.number_formats wb._protections = parsed_styles.protections wb._alignments = parsed_styles.alignments wb._colors = parsed_styles.color_index wb.excel_base_date = read_excel_base_date(archive) # get worksheets wb._sheets = [] # remove preset worksheet for sheet in detect_worksheets(archive): sheet_name = sheet['title'] worksheet_path = sheet['path'] if not worksheet_path in valid_files: continue if read_only: new_ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None, shared_strings) wb._add_sheet(new_ws) else: fh = archive.open(worksheet_path) parser = WorkSheetParser(wb, sheet_name, fh, shared_strings) parser.parse() new_ws = wb[sheet_name] new_ws.sheet_state = sheet['state'] if wb.vba_archive is not None and new_ws.legacy_drawing is not None: # We need to get the file name of the legacy drawing dirname, basename = worksheet_path.rsplit('/', 1) rels_path = '/'.join((dirname, '_rels', basename + '.rels')) rels = get_dependents(archive, rels_path) new_ws.legacy_drawing = rels[new_ws.legacy_drawing].target if not read_only: # load comments into the worksheet cells comments_file = get_comments_file(worksheet_path, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) drawings_file = get_drawings_file(worksheet_path, archive, valid_files) if drawings_file is not None: read_drawings(new_ws, drawings_file, archive, valid_files) wb._differential_styles = [] # reset wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb)) wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK)) if EXTERNAL_LINK in cts: rels = read_rels(archive) wb._external_links = list(detect_external_links(rels, archive)) archive.close() return wb
def test_get_max_cell(datadir, DummyWorkbook, ReadOnlyWorksheet, filename): datadir.join("reader").chdir() ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", filename, []) rows = tuple(ws.rows) assert rows[-1][-1].coordinate == "AA30"
def test_ctor(datadir, DummyWorkbook, ReadOnlyWorksheet, filename, expected): datadir.join("reader").chdir() with open(filename) as src: ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", src, []) assert (ws.min_row, ws.min_column, ws.max_row, ws.max_column) == expected
def test_read_empty_rows(datadir, DummyWorkbook, ReadOnlyWorksheet): ws = ReadOnlyWorksheet(DummyWorkbook, "Sheet", "", "empty_rows.xml", []) rows = tuple(ws.rows) assert len(rows) == 7
def read_worksheets(self): for sheet, rel in self.parser.find_sheets(): if rel.target not in self.valid_files: continue if "chartsheet" in rel.Type: self.read_chartsheet(sheet, rel) continue rels_path = get_rels_path(rel.target) rels = RelationshipList() if rels_path in self.valid_files: rels = get_dependents(self.archive, rels_path) if self.read_only: ws = ReadOnlyWorksheet(self.wb, sheet.name, rel.target, self.shared_strings) self.wb._sheets.append(ws) continue else: fh = self.archive.open(rel.target) ws = self.wb.create_sheet(sheet.name) ws._rels = rels ws_parser = WorksheetReader(ws, fh, self.shared_strings, self.data_only) ws_parser.bind_all() # assign any comments to cells for r in rels.find(COMMENTS_NS): src = self.archive.read(r.target) comment_sheet = CommentSheet.from_tree(fromstring(src)) for ref, comment in comment_sheet.comments: ws[ref].comment = comment # preserve link to VML file if VBA if self.wb.vba_archive and ws.legacy_drawing: ws.legacy_drawing = rels[ws.legacy_drawing].target for t in ws_parser.tables: src = self.archive.read(t) xml = fromstring(src) table = Table.from_tree(xml) ws.add_table(table) drawings = rels.find(SpreadsheetDrawing._rel_type) for rel in drawings: charts, images = find_images(self.archive, rel.target) for c in charts: ws.add_chart(c, c.anchor) for im in images: ws.add_image(im, im.anchor) pivot_rel = rels.find(TableDefinition.rel_type) for r in pivot_rel: pivot_path = r.Target src = self.archive.read(pivot_path) tree = fromstring(src) pivot = TableDefinition.from_tree(tree) pivot.cache = self.parser.pivot_caches[pivot.cacheId] ws.add_pivot(pivot) ws.sheet_state = sheet.state
def load_workbook(filename, read_only=False, use_iterators=False, keep_vba=KEEP_VBA, guess_types=False, data_only=False): """Open the given filename and return the workbook :param filename: the path to open or a file-like object :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` :param read_only: optimised for reading, content cannot be edited :type read_only: bool :param use_iterators: use lazy load for cells :type use_iterators: bool :param keep_vba: preseve vba content (this does NOT mean you can use it) :type keep_vba: bool :param guess_types: guess cell content type and do not read it from the file :type guess_types: bool :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet :type data_only: bool :rtype: :class:`openpyxl.workbook.Workbook` .. note:: When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` and the returned workbook will be read-only. """ archive = _validate_archive(filename) read_only = read_only or use_iterators wb = Workbook(guess_types=guess_types, data_only=data_only, read_only=read_only) if read_only and guess_types: warnings.warn('Data types are not guessed when using iterator reader') valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: try: f = open(filename, 'rb') s = f.read() f.close() except: pos = filename.tell() filename.seek(0) s = filename.read() filename.seek(pos) wb.vba_archive = ZipFile(BytesIO(s), 'r') if read_only: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties(archive.read(ARC_CORE)) except KeyError: wb.properties = DocumentProperties() wb.active = read_workbook_settings(archive.read(ARC_WORKBOOK)) or 0 # what content types do we have? cts = dict(read_content_types(archive)) strings_path = cts.get(SHARED_STRINGS) if strings_path is not None: if strings_path.startswith("/"): strings_path = strings_path[1:] shared_strings = read_string_table(archive.read(strings_path)) else: shared_strings = [] wb.is_template = XLTX in cts or XLTM in cts try: wb.loaded_theme = archive.read( ARC_THEME ) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" parsed_styles = read_style_table(archive) if parsed_styles is not None: wb._differential_styles = parsed_styles.differential_styles wb._cell_styles = parsed_styles.cell_styles wb._named_styles = parsed_styles.named_styles wb._colors = parsed_styles.color_index wb._borders = parsed_styles.border_list wb._fonts = parsed_styles.font_list wb._fills = parsed_styles.fill_list wb._number_formats = parsed_styles.number_formats wb._protections = parsed_styles.protections wb._alignments = parsed_styles.alignments wb._colors = parsed_styles.color_index wb.excel_base_date = read_excel_base_date(archive) # get worksheets wb._sheets = [] # remove preset worksheet for sheet in detect_worksheets(archive): sheet_name = sheet['title'] worksheet_path = sheet['path'] if not worksheet_path in valid_files: continue if read_only: new_ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None, shared_strings) wb._add_sheet(new_ws) else: fh = archive.open(worksheet_path) parser = WorkSheetParser(wb, sheet_name, fh, shared_strings) parser.parse() new_ws = wb[sheet_name] new_ws.sheet_state = sheet['state'] if wb.vba_archive is not None and new_ws.legacy_drawing is not None: # We need to get the file name of the legacy drawing dirname, basename = worksheet_path.rsplit('/', 1) rels_path = '/'.join((dirname, '_rels', basename + '.rels')) rels = get_dependents(archive, rels_path) new_ws.legacy_drawing = rels[new_ws.legacy_drawing].target if not read_only: # load comments into the worksheet cells comments_file = get_comments_file(worksheet_path, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) wb._differential_styles = [] # reset wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb)) wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK)) if EXTERNAL_LINK in cts: rels = read_rels(archive) wb._external_links = list(detect_external_links(rels, archive)) archive.close() return wb