def _load_workbook(wb, archive, filename, use_iterators): valid_files = archive.namelist() # get workbook-level information wb.properties = read_properties_core(archive.read(ARC_CORE)) try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} style_table = read_style_table(archive.read(ARC_STYLE)) # get worksheets wb.worksheets = [] # remove preset worksheet sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK)) for i, sheet_name in enumerate(sheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table) else: xml_source = unpack_worksheet(archive, worksheet_path) new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename) #new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, filename, sheet_codename) wb.add_sheet(new_ws, index = i) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
def _load_workbook(wb, archive, filename, use_iterators): valid_files = archive.namelist() # get workbook-level information wb.properties = read_properties_core(archive.read(ARC_CORE)) try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} style_table = read_style_table(archive.read(ARC_STYLE)) # get worksheets wb.worksheets = [] # remove preset worksheet sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK)) for i, sheet_name in enumerate(sheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table) else: xml_source = unpack_worksheet(archive, worksheet_path) new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename) #new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, filename, sheet_codename) wb.add_sheet(new_ws, index=i) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
def test_read_named_ranges_missing_sheet(datadir): ws = DummyWS('NOT My Sheeet') datadir.join("reader").chdir() with open("workbook.xml") as src: content = src.read() named_ranges = read_named_ranges(content, DummyWB(ws)) assert list(named_ranges) == []
def test_read_named_ranges(datadir): ws = DummyWS('My Sheeet') datadir.join("reader").chdir() with open("workbook.xml") as src: content = src.read() named_ranges = read_named_ranges(content, DummyWB(ws)) assert ["My Sheeet!$D$8"] == [str(range) for range in named_ranges]
def test_read_named_ranges(datadir): ws = DummyWS('My Sheeet') datadir.chdir() with open("workbook.xml") as src: content = src.read() named_ranges = read_named_ranges(content, DummyWB(ws)) assert ["My Sheeet!$D$8"] == [str(range) for range in named_ranges]
def test_read_named_ranges_missing_sheet(datadir): ws = DummyWS('NOT My Sheeet') datadir.chdir() with open("workbook.xml") as src: content = src.read() named_ranges = read_named_ranges(content, DummyWB(ws)) assert list(named_ranges) == []
def test_range_name_worksheet_special_chars(datadir): ws = DummyWS('My Sheeet with a , and \'') datadir.chdir() with open('workbook_namedrange.xml') as src: content = src.read() named_ranges = list(read_named_ranges(content, DummyWB(ws))) assert len(named_ranges) == 1 assert isinstance(named_ranges[0], NamedRange) assert [(ws, '$U$16:$U$24'), (ws, '$V$28:$V$36')] == named_ranges[0].destinations
def _load_workbook(wb, archive, filename, use_iterators, keep_vba): valid_files = archive.namelist() # If are going to preserve the vba then attach the archive to the # workbook so that is available for the save. if keep_vba: wb.vba_archive = archive # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) wb.read_workbook_settings(archive.read(ARC_WORKBOOK)) except KeyError: wb.properties = DocumentProperties() try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} try: wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_properties = read_style_table(archive.read(ARC_STYLE)) style_table = style_properties.pop('table') wb.style_properties = style_properties wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet content_types = read_content_types(archive.read(ARC_CONTENT_TYPES)) sheet_types = [(sheet, contyp) for sheet, contyp in content_types if contyp in WORK_OR_CHART_TYPE] sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK)) worksheet_names = [worksheet for worksheet, sheet_type in zip(sheet_names, sheet_types) if sheet_type[1] == VALID_WORKSHEET] for i, sheet_name in enumerate(worksheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, style_properties['color_index'], keep_vba=keep_vba) else: xml_source = unpack_worksheet(archive, worksheet_path) new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, style_properties['color_index'], filename, sheet_codename) wb.add_sheet(new_ws, index=i) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
def test_read_external_ranges(datadir): datadir.chdir() ws = DummyWS("Sheet1") wb = DummyWB(ws) with open("workbook_external_range.xml") as src: xml = src.read() named_ranges = list(read_named_ranges(xml, wb)) assert len(named_ranges) == 4 expected = [ ("B1namedrange", "'Sheet1'!$A$1"), ("references_external_workbook", "[1]Sheet1!$A$1"), ("references_nr_in_ext_wb", "[1]!B2range"), ("references_other_named_range", "B1namedrange"), ] for xlr, target in zip(named_ranges, expected): assert xlr.name, xlr.value == target
def test_read_named_ranges(): class DummyWs(object): title = 'My Sheeet' def __str__(self): return self.title class DummyWB(object): def get_sheet_by_name(self, name): return DummyWs() with open(os.path.join(DATADIR, 'reader', 'workbook.xml')) as handle: content = handle.read() named_ranges = read_named_ranges(content, DummyWB()) eq_(["My Sheeet!$D$8"], [str(range) for range in named_ranges])
def _load_workbook(wb, archive, filename, use_iterators): valid_files = archive.namelist() # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) except KeyError: wb.properties = DocumentProperties() try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} try: wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_table = read_style_table(archive.read(ARC_STYLE)) wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK)) for i, sheet_name in enumerate(sheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table) else: xml_source = unpack_worksheet(archive, worksheet_path) new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename) wb.add_sheet(new_ws, index=i) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
def test_range_name_worksheet_special_chars(): class DummyWs(object): title = 'My Sheeet with a , and \'' def __str__(self): return self.title ws = DummyWs() class DummyWB(object): def get_sheet_by_name(self, name): if name == ws.title: return ws handle = open(os.path.join(DATADIR, 'reader', 'workbook_namedrange.xml')) try: content = handle.read() named_ranges = read_named_ranges(content, DummyWB()) eq_(1, len(named_ranges)) ok_(isinstance(named_ranges[0], NamedRange)) eq_([(ws, '$U$16:$U$24'), (ws, '$V$28:$V$36')], named_ranges[0].destinations) finally: handle.close()
try: # get workbook-level information wb.properties = read_properties_core(archive.read(ARC_CORE)) try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} style_table = read_style_table(archive.read(ARC_STYLE)) # get worksheets wb.worksheets = [] # remove preset worksheet sheet_names = read_sheets_titles(archive.read(ARC_APP)) for i, sheet_name in enumerate(sheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table) else: xml_source = unpack_worksheet(archive, worksheet_path) new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename) #new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, filename, sheet_codename) wb.add_sheet(new_ws, index = i) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb) except KeyError, e: raise InvalidFileException(unicode(e)) finally: archive.close() return wb
def _load_workbook(wb, archive, filename, use_iterators, keep_vba): valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: try: f = open(filename, 'rb') s = f.read() f.close() except: pos = filename.tell() filename.seek(0) s = filename.read() filename.seek(pos) wb.vba_archive = ZipFile(BytesIO(s), 'r') if use_iterators: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) wb.read_workbook_settings(archive.read(ARC_WORKBOOK)) except KeyError: wb.properties = DocumentProperties() try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} try: wb.loaded_theme = archive.read( ARC_THEME ) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_properties = read_style_table(archive.read(ARC_STYLE)) style_table = style_properties.pop('table') wb.style_properties = style_properties wb.properties.excel_base_date = read_excel_base_date( xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet for sheet in detect_worksheets(archive): sheet_name = sheet['title'] worksheet_path = '%s/%s' % (PACKAGE_XL, sheet['path']) if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet( archive.read(worksheet_path), wb, sheet_name, string_table, style_table, color_index=style_properties['color_index'], keep_vba=keep_vba) else: new_ws = read_worksheet( None, wb, sheet_name, string_table, style_table, color_index=style_properties['color_index'], worksheet_path=worksheet_path) wb.add_sheet(new_ws) if not use_iterators: # load comments into the worksheet cells comments_file = get_comments_file(worksheet_path, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
def _load_workbook(wb, archive, filename, use_iterators, keep_vba): valid_files = archive.namelist() # If are going to preserve the vba then attach the archive to the # workbook so that is available for the save. if keep_vba: wb.vba_archive = archive if use_iterators: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) wb.read_workbook_settings(archive.read(ARC_WORKBOOK)) except KeyError: wb.properties = DocumentProperties() try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} try: wb.loaded_theme = archive.read( ARC_THEME ) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_properties = read_style_table(archive.read(ARC_STYLE)) style_table = style_properties.pop('table') wb.style_properties = style_properties wb.properties.excel_base_date = read_excel_base_date( xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet content_types = read_content_types(archive.read(ARC_CONTENT_TYPES)) sheet_types = [(sheet, contyp) for sheet, contyp in content_types if contyp in WORK_OR_CHART_TYPE] sheet_names = read_sheets_titles(archive.read(ARC_WORKBOOK)) worksheet_names = [ worksheet for worksheet, sheet_type in zip(sheet_names, sheet_types) if sheet_type[1] == VALID_WORKSHEET ] for i, sheet_name in enumerate(worksheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet( archive.read(worksheet_path), wb, sheet_name, string_table, style_table, color_index=style_properties['color_index'], keep_vba=keep_vba) else: new_ws = read_worksheet( None, wb, sheet_name, string_table, style_table, color_index=style_properties['color_index'], sheet_codename=sheet_codename) wb.add_sheet(new_ws, index=i) if not use_iterators: # load comments into the worksheet cells comments_file = get_comments_file(sheet_codename, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
try: string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: string_table = {} style_table = read_style_table(archive.read(ARC_STYLE)) # get worksheets wb.worksheets = [] # remove preset worksheet sheet_names = read_sheets_titles(archive.read(ARC_APP)) for i, sheet_name in enumerate(sheet_names): sheet_codename = 'sheet%d.xml' % (i + 1) worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table) else: xml_source = unpack_worksheet(archive, worksheet_path) new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename) #new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, filename, sheet_codename) wb.add_sheet(new_ws, index=i) wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb) except KeyError, e: raise InvalidFileException(unicode(e)) finally: archive.close() return wb
def _load_workbook(wb, archive, filename, use_iterators, keep_vba): valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: try: f = open(filename, 'rb') s = f.read() f.close() except: pos = filename.tell() filename.seek(0) s = filename.read() filename.seek(pos) wb.vba_archive = ZipFile(BytesIO(s), 'r') if use_iterators: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) wb.read_workbook_settings(archive.read(ARC_WORKBOOK)) except KeyError: wb.properties = DocumentProperties() try: shared_strings = read_string_table(archive.read(ARC_SHARED_STRINGS)) except KeyError: shared_strings = [] try: wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_properties = read_style_table(archive.read(ARC_STYLE)) style_table = style_properties.pop('table') wb.shared_styles = style_properties.pop('list') wb.style_properties = style_properties wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet for sheet in detect_worksheets(archive): sheet_name = sheet['title'] worksheet_path = sheet['path'] if not worksheet_path in valid_files: continue if not use_iterators: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, shared_strings, style_table, color_index=style_properties['color_index'], keep_vba=keep_vba) else: new_ws = read_worksheet(None, wb, sheet_name, shared_strings, style_table, color_index=style_properties['color_index'], worksheet_path=worksheet_path) wb.add_sheet(new_ws) if not use_iterators: # load comments into the worksheet cells comments_file = get_comments_file(worksheet_path, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))
def _load_workbook(wb, archive, filename, read_only, keep_vba): valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: try: f = open(filename, 'rb') s = f.read() f.close() except: pos = filename.tell() filename.seek(0) s = filename.read() filename.seek(pos) wb.vba_archive = ZipFile(BytesIO(s), 'r') if read_only: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) except KeyError: wb.properties = DocumentProperties() wb._read_workbook_settings(archive.read(ARC_WORKBOOK)) # what content types do we have? cts = dict(read_content_types(archive)) rels = dict strings_path = cts.get(SHARED_STRINGS) if strings_path is not None: if strings_path.startswith("/"): strings_path = strings_path[1:] shared_strings = read_string_table(archive.read(strings_path)) else: shared_strings = [] try: wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_table, color_index, cond_styles = read_style_table(archive.read(ARC_STYLE)) wb.shared_styles = style_table wb.style_properties = {'dxf_list':cond_styles} wb.cond_styles = cond_styles wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet for sheet in detect_worksheets(archive): sheet_name = sheet['title'] worksheet_path = sheet['path'] if not worksheet_path in valid_files: continue if not read_only: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, shared_strings, style_table, color_index=color_index, keep_vba=keep_vba) else: new_ws = read_worksheet(None, wb, sheet_name, shared_strings, style_table, color_index=color_index, worksheet_path=worksheet_path) new_ws.sheet_state = sheet.get('state') or 'visible' wb._add_sheet(new_ws) if not read_only: # load comments into the worksheet cells comments_file = get_comments_file(worksheet_path, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb)) if EXTERNAL_LINK in cts: rels = read_rels(archive) wb._external_links = list(detect_external_links(rels, archive))
def _load_workbook(wb, archive, filename, read_only, keep_vba): valid_files = archive.namelist() # If are going to preserve the vba then attach a copy of the archive to the # workbook so that is available for the save. if keep_vba: try: f = open(filename, 'rb') s = f.read() f.close() except: pos = filename.tell() filename.seek(0) s = filename.read() filename.seek(pos) wb.vba_archive = ZipFile(BytesIO(s), 'r') if read_only: wb._archive = ZipFile(filename) # get workbook-level information try: wb.properties = read_properties_core(archive.read(ARC_CORE)) except KeyError: wb.properties = DocumentProperties() wb._read_workbook_settings(archive.read(ARC_WORKBOOK)) # what content types do we have? cts = dict(read_content_types(archive)) rels = dict strings_path = cts.get(SHARED_STRINGS) if strings_path is not None: if strings_path.startswith("/"): strings_path = strings_path[1:] shared_strings = read_string_table(archive.read(strings_path)) else: shared_strings = [] try: wb.loaded_theme = archive.read( ARC_THEME ) # some writers don't output a theme, live with it (fixes #160) except KeyError: assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" style_table, color_index, cond_styles = read_style_table( archive.read(ARC_STYLE)) wb.shared_styles = style_table wb.style_properties = {'dxf_list': cond_styles} wb.cond_styles = cond_styles wb.properties.excel_base_date = read_excel_base_date( xml_source=archive.read(ARC_WORKBOOK)) # get worksheets wb.worksheets = [] # remove preset worksheet for sheet in detect_worksheets(archive): sheet_name = sheet['title'] worksheet_path = sheet['path'] if not worksheet_path in valid_files: continue if not read_only: new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, shared_strings, style_table, color_index=color_index, keep_vba=keep_vba) else: new_ws = read_worksheet(None, wb, sheet_name, shared_strings, style_table, color_index=color_index, worksheet_path=worksheet_path) new_ws.sheet_state = sheet.get('state') or 'visible' wb._add_sheet(new_ws) if not read_only: # load comments into the worksheet cells comments_file = get_comments_file(worksheet_path, archive, valid_files) if comments_file is not None: read_comments(new_ws, archive.read(comments_file)) wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb)) wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK)) if EXTERNAL_LINK in cts: rels = read_rels(archive) wb._external_links = list(detect_external_links(rels, archive))