def make_name_access_maps(bk): name_and_scope_map = {} # (name.lower(), scope): Name_object name_map = {} # name.lower() : list of Name_objects (sorted in scope order) num_names = len(bk.name_obj_list) for namex in xrange(num_names): nobj = bk.name_obj_list[namex] name_lcase = nobj.name.lower() key = (name_lcase, nobj.scope) if name_and_scope_map.has_key(key): msg = 'Duplicate entry %r in name_and_scope_map' % (key, ) if 0: raise XLRDError(msg) else: if bk.verbosity: print >> bk.logfile, msg name_and_scope_map[key] = nobj if name_map.has_key(name_lcase): name_map[name_lcase].append((nobj.scope, nobj)) else: name_map[name_lcase] = [(nobj.scope, nobj)] for key in name_map.keys(): alist = name_map[key] alist.sort() name_map[key] = [x[1] for x in alist] bk.name_and_scope_map = name_and_scope_map bk.name_map = name_map
def handle_palette(book, data): if not book.formatting_info: return blah = DEBUG or book.verbosity >= 2 n_colours, = unpack('<H', data[:2]) expected_n_colours = (16, 56)[book.biff_version >= 50] if ((DEBUG or book.verbosity >= 1) and n_colours != expected_n_colours): fprintf(book.logfile, "NOTE *** Expected %d colours in PALETTE record, found %d\n", expected_n_colours, n_colours) elif blah: fprintf(book.logfile, "PALETTE record with %d colours\n", n_colours) fmt = '<xx%di' % n_colours # use i to avoid long integers expected_size = 4 * n_colours + 2 actual_size = len(data) tolerance = 4 if not expected_size <= actual_size <= expected_size + tolerance: raise XLRDError('PALETTE record: expected size %d, actual size %d' % (expected_size, actual_size)) colours = unpack(fmt, data[:expected_size]) assert book.palette_record == [] # There should be only 1 PALETTE record # a colour will be 0xbbggrr # IOW, red is at the little end for i in xrange(n_colours): c = colours[i] red = c & 0xff green = (c >> 8) & 0xff blue = (c >> 16) & 0xff old_rgb = book.colour_map[8 + i] new_rgb = (red, green, blue) book.palette_record.append(new_rgb) book.colour_map[8 + i] = new_rgb if blah: if new_rgb != old_rgb: print >> book.logfile, "%2d: %r -> %r" % (i, old_rgb, new_rgb)
def open_workbook( filename=None, logfile=sys.stdout, verbosity=0, pickleable=True, use_mmap=USE_MMAP, file_contents=None, encoding_override=None, formatting_info=False, on_demand=False, ragged_rows=False, ): peeksz = 4 if file_contents: peek = file_contents[:peeksz] else: f = open(filename, "rb") peek = f.read(peeksz) f.close() if peek == timemachine.BYTES_LITERAL("PK\x03\x04"): # a ZIP file if file_contents: zf = zipfile.ZipFile(timemachine.BYTES_IO(file_contents)) else: zf = zipfile.ZipFile(filename) component_names = zf.namelist() if verbosity: logfile.write('ZIP component_names:\n') pprint.pprint(component_names, logfile) if 'xl/workbook.xml' in component_names: import xlsx bk = xlsx.open_workbook_2007_xml( zf, component_names, logfile=logfile, verbosity=verbosity, pickleable=pickleable, use_mmap=mmap, formatting_info=formatting_info, on_demand=on_demand, ragged_rows=ragged_rows, ) return bk if 'xl/workbook.bin' in component_names: raise XLRDError('Excel 2007 xlsb file; not supported') if 'content.xml' in component_names: raise XLRDError('Openoffice.org ODS file; not supported') raise XLRDError('ZIP file contents not a known type of workbook') import book bk = book.open_workbook_xls( filename=filename, logfile=logfile, verbosity=verbosity, pickleable=pickleable, use_mmap=use_mmap, file_contents=file_contents, encoding_override=encoding_override, formatting_info=formatting_info, on_demand=on_demand, ragged_rows=ragged_rows, ) return bk
def handle_xf(self, data): ### self is a Book instance # DEBUG = 0 blah = DEBUG or self.verbosity >= 3 bv = self.biff_version xf = XF() xf.alignment = XFAlignment() xf.alignment.indent_level = 0 xf.alignment.shrink_to_fit = 0 xf.alignment.text_direction = 0 xf.border = XFBorder() xf.border.diag_up = 0 xf.border.diag_down = 0 xf.border.diag_colour_index = 0 xf.border.diag_line_style = 0 # no line xf.background = XFBackground() xf.protection = XFProtection() # fill in the known standard formats if bv >= 50 and not self.xfcount: # i.e. do this once before we process the first XF record for x in std_format_code_types.keys(): if not self.format_map.has_key(x): ty = std_format_code_types[x] fmt_str = std_format_strings[x] fmtobj = Format(x, ty, fmt_str) self.format_map[x] = fmtobj if bv >= 80: unpack_fmt = '<HHHBBBBIiH' ( xf.font_index, xf.format_key, pkd_type_par, pkd_align1, xf.alignment.rotation, pkd_align2, pkd_used, pkd_brdbkg1, pkd_brdbkg2, pkd_brdbkg3, ) = unpack(unpack_fmt, data[0:20]) upkbits(xf.protection, pkd_type_par, ( (0, 0x01, 'cell_locked'), (1, 0x02, 'formula_hidden'), )) upkbits( xf, pkd_type_par, ( (2, 0x0004, 'is_style'), # Following is not in OOo docs, but is mentioned # in Gnumeric source and also in (deep breath) # org.apache.poi.hssf.record.ExtendedFormatRecord.java (3, 0x0008, 'lotus_123_prefix'), # Meaning is not known. (4, 0xFFF0, 'parent_style_index'), )) upkbits(xf.alignment, pkd_align1, ( (0, 0x07, 'hor_align'), (3, 0x08, 'text_wrapped'), (4, 0x70, 'vert_align'), )) upkbits(xf.alignment, pkd_align2, ( (0, 0x0f, 'indent_level'), (4, 0x10, 'shrink_to_fit'), (6, 0xC0, 'text_direction'), )) reg = pkd_used >> 2 for attr_stem in \ "format font alignment border background protection".split(): attr = "_" + attr_stem + "_flag" setattr(xf, attr, reg & 1) reg >>= 1 upkbitsL(xf.border, pkd_brdbkg1, ( (0, 0x0000000f, 'left_line_style'), (4, 0x000000f0, 'right_line_style'), (8, 0x00000f00, 'top_line_style'), (12, 0x0000f000, 'bottom_line_style'), (16, 0x007f0000, 'left_colour_index'), (23, 0x3f800000, 'right_colour_index'), (30, 0x40000000, 'diag_down'), (31, 0x80000000L, 'diag_up'), )) upkbits(xf.border, pkd_brdbkg2, ( (0, 0x0000007F, 'top_colour_index'), (7, 0x00003F80, 'bottom_colour_index'), (14, 0x001FC000, 'diag_colour_index'), (21, 0x01E00000, 'diag_line_style'), )) upkbitsL(xf.background, pkd_brdbkg2, ((26, 0xFC000000L, 'fill_pattern'), )) upkbits(xf.background, pkd_brdbkg3, ( (0, 0x007F, 'pattern_colour_index'), (7, 0x3F80, 'background_colour_index'), )) elif bv >= 50: unpack_fmt = '<HHHBBIi' ( xf.font_index, xf.format_key, pkd_type_par, pkd_align1, pkd_orient_used, pkd_brdbkg1, pkd_brdbkg2, ) = unpack(unpack_fmt, data[0:16]) upkbits(xf.protection, pkd_type_par, ( (0, 0x01, 'cell_locked'), (1, 0x02, 'formula_hidden'), )) upkbits( xf, pkd_type_par, ( (2, 0x0004, 'is_style'), (3, 0x0008, 'lotus_123_prefix'), # Meaning is not known. (4, 0xFFF0, 'parent_style_index'), )) upkbits(xf.alignment, pkd_align1, ( (0, 0x07, 'hor_align'), (3, 0x08, 'text_wrapped'), (4, 0x70, 'vert_align'), )) orientation = pkd_orient_used & 0x03 xf.alignment.rotation = [0, 255, 90, 180][orientation] reg = pkd_orient_used >> 2 for attr_stem in \ "format font alignment border background protection".split(): attr = "_" + attr_stem + "_flag" setattr(xf, attr, reg & 1) reg >>= 1 upkbitsL(xf.background, pkd_brdbkg1, ( (0, 0x0000007F, 'pattern_colour_index'), (7, 0x00003F80, 'background_colour_index'), (16, 0x003F0000, 'fill_pattern'), )) upkbitsL(xf.border, pkd_brdbkg1, ( (22, 0x01C00000, 'bottom_line_style'), (25, 0xFE000000L, 'bottom_colour_index'), )) upkbits(xf.border, pkd_brdbkg2, ( (0, 0x00000007, 'top_line_style'), (3, 0x00000038, 'left_line_style'), (6, 0x000001C0, 'right_line_style'), (9, 0x0000FE00, 'top_colour_index'), (16, 0x007F0000, 'left_colour_index'), (23, 0x3F800000, 'right_colour_index'), )) elif bv >= 40: unpack_fmt = '<BBHBBHI' ( xf.font_index, xf.format_key, pkd_type_par, pkd_align_orient, pkd_used, pkd_bkg_34, pkd_brd_34, ) = unpack(unpack_fmt, data[0:12]) upkbits(xf.protection, pkd_type_par, ( (0, 0x01, 'cell_locked'), (1, 0x02, 'formula_hidden'), )) upkbits( xf, pkd_type_par, ( (2, 0x0004, 'is_style'), (3, 0x0008, 'lotus_123_prefix'), # Meaning is not known. (4, 0xFFF0, 'parent_style_index'), )) upkbits(xf.alignment, pkd_align_orient, ( (0, 0x07, 'hor_align'), (3, 0x08, 'text_wrapped'), (4, 0x30, 'vert_align'), )) orientation = (pkd_align_orient & 0xC0) >> 6 xf.alignment.rotation = [0, 255, 90, 180][orientation] reg = pkd_used >> 2 for attr_stem in \ "format font alignment border background protection".split(): attr = "_" + attr_stem + "_flag" setattr(xf, attr, reg & 1) reg >>= 1 upkbits(xf.background, pkd_bkg_34, ( (0, 0x003F, 'fill_pattern'), (6, 0x07C0, 'pattern_colour_index'), (11, 0xF800, 'background_colour_index'), )) upkbitsL(xf.border, pkd_brd_34, ( (0, 0x00000007, 'top_line_style'), (3, 0x000000F8, 'top_colour_index'), (8, 0x00000700, 'left_line_style'), (11, 0x0000F800, 'left_colour_index'), (16, 0x00070000, 'bottom_line_style'), (19, 0x00F80000, 'bottom_colour_index'), (24, 0x07000000, 'right_line_style'), (27, 0xF8000000L, 'right_colour_index'), )) elif bv == 30: unpack_fmt = '<BBBBHHI' ( xf.font_index, xf.format_key, pkd_type_prot, pkd_used, pkd_align_par, pkd_bkg_34, pkd_brd_34, ) = unpack(unpack_fmt, data[0:12]) upkbits(xf.protection, pkd_type_prot, ( (0, 0x01, 'cell_locked'), (1, 0x02, 'formula_hidden'), )) upkbits( xf, pkd_type_prot, ( (2, 0x0004, 'is_style'), (3, 0x0008, 'lotus_123_prefix'), # Meaning is not known. )) upkbits(xf.alignment, pkd_align_par, ( (0, 0x07, 'hor_align'), (3, 0x08, 'text_wrapped'), )) upkbits(xf, pkd_align_par, ((4, 0xFFF0, 'parent_style_index'), )) reg = pkd_used >> 2 for attr_stem in \ "format font alignment border background protection".split(): attr = "_" + attr_stem + "_flag" setattr(xf, attr, reg & 1) reg >>= 1 upkbits(xf.background, pkd_bkg_34, ( (0, 0x003F, 'fill_pattern'), (6, 0x07C0, 'pattern_colour_index'), (11, 0xF800, 'background_colour_index'), )) upkbitsL(xf.border, pkd_brd_34, ( (0, 0x00000007, 'top_line_style'), (3, 0x000000F8, 'top_colour_index'), (8, 0x00000700, 'left_line_style'), (11, 0x0000F800, 'left_colour_index'), (16, 0x00070000, 'bottom_line_style'), (19, 0x00F80000, 'bottom_colour_index'), (24, 0x07000000, 'right_line_style'), (27, 0xF8000000L, 'right_colour_index'), )) xf.alignment.vert_align = 2 # bottom xf.alignment.rotation = 0 else: raise XLRDError('programmer stuff-up: bv=%d' % bv) xf.xf_index = len(self.xf_list) self.xf_list.append(xf) self.xfcount += 1 if blah: xf.dump( self.logfile, header="--- handle_xf: xf[%d] ---" % xf.xf_index, footer=" ", ) # Now for some assertions ... if xf.is_style: assert xf.parent_style_index == 0x0FFF check_colour_indexes_in_obj(self, xf, xf.xf_index) if not self.format_map.has_key(xf.format_key): msg = "WARNING *** XF[%d] unknown (raw) format key (%d, 0x%04x)\n" fprintf(self.logfile, msg, xf.xf_index, xf.format_key, xf.format_key) xf.format_key = 0
def handle_xf(self, data): ### self is a Book instance # DEBUG = 0 blah = DEBUG or self.verbosity >= 3 bv = self.biff_version xf = XF() xf.alignment = XFAlignment() xf.alignment.indent_level = 0 xf.alignment.shrink_to_fit = 0 xf.alignment.text_direction = 0 xf.border = XFBorder() xf.border.diag_up = 0 xf.border.diag_down = 0 xf.border.diag_colour_index = 0 xf.border.diag_line_style = 0 # no line xf.background = XFBackground() xf.protection = XFProtection() # fill in the known standard formats if bv >= 50 and not self.xfcount: # i.e. do this once before we process the first XF record fill_in_standard_formats(self) if bv >= 80: unpack_fmt = '<HHHBBBBIiH' ( xf.font_index, xf.format_key, pkd_type_par, pkd_align1, xf.alignment.rotation, pkd_align2, pkd_used, pkd_brdbkg1, pkd_brdbkg2, pkd_brdbkg3, ) = unpack(unpack_fmt, data[0:20]) upkbits(xf.protection, pkd_type_par, ( (0, 0x01, 'cell_locked'), (1, 0x02, 'formula_hidden'), )) upkbits( xf, pkd_type_par, ( (2, 0x0004, 'is_style'), # Following is not in OOo docs, but is mentioned # in Gnumeric source and also in (deep breath) # org.apache.poi.hssf.record.ExtendedFormatRecord.java (3, 0x0008, 'lotus_123_prefix'), # Meaning is not known. (4, 0xFFF0, 'parent_style_index'), )) upkbits(xf.alignment, pkd_align1, ( (0, 0x07, 'hor_align'), (3, 0x08, 'text_wrapped'), (4, 0x70, 'vert_align'), )) upkbits(xf.alignment, pkd_align2, ( (0, 0x0f, 'indent_level'), (4, 0x10, 'shrink_to_fit'), (6, 0xC0, 'text_direction'), )) reg = pkd_used >> 2 for attr_stem in \ "format font alignment border background protection".split(): attr = "_" + attr_stem + "_flag" setattr(xf, attr, reg & 1) reg >>= 1 upkbitsL(xf.border, pkd_brdbkg1, ( (0, 0x0000000f, 'left_line_style'), (4, 0x000000f0, 'right_line_style'), (8, 0x00000f00, 'top_line_style'), (12, 0x0000f000, 'bottom_line_style'), (16, 0x007f0000, 'left_colour_index'), (23, 0x3f800000, 'right_colour_index'), (30, 0x40000000, 'diag_down'), (31, 0x80000000L, 'diag_up'), )) upkbits(xf.border, pkd_brdbkg2, ( (0, 0x0000007F, 'top_colour_index'), (7, 0x00003F80, 'bottom_colour_index'), (14, 0x001FC000, 'diag_colour_index'), (21, 0x01E00000, 'diag_line_style'), )) upkbitsL(xf.background, pkd_brdbkg2, ((26, 0xFC000000L, 'fill_pattern'), )) upkbits(xf.background, pkd_brdbkg3, ( (0, 0x007F, 'pattern_colour_index'), (7, 0x3F80, 'background_colour_index'), )) elif bv >= 50: unpack_fmt = '<HHHBBIi' ( xf.font_index, xf.format_key, pkd_type_par, pkd_align1, pkd_orient_used, pkd_brdbkg1, pkd_brdbkg2, ) = unpack(unpack_fmt, data[0:16]) upkbits(xf.protection, pkd_type_par, ( (0, 0x01, 'cell_locked'), (1, 0x02, 'formula_hidden'), )) upkbits( xf, pkd_type_par, ( (2, 0x0004, 'is_style'), (3, 0x0008, 'lotus_123_prefix'), # Meaning is not known. (4, 0xFFF0, 'parent_style_index'), )) upkbits(xf.alignment, pkd_align1, ( (0, 0x07, 'hor_align'), (3, 0x08, 'text_wrapped'), (4, 0x70, 'vert_align'), )) orientation = pkd_orient_used & 0x03 xf.alignment.rotation = [0, 255, 90, 180][orientation] reg = pkd_orient_used >> 2 for attr_stem in \ "format font alignment border background protection".split(): attr = "_" + attr_stem + "_flag" setattr(xf, attr, reg & 1) reg >>= 1 upkbitsL(xf.background, pkd_brdbkg1, ( (0, 0x0000007F, 'pattern_colour_index'), (7, 0x00003F80, 'background_colour_index'), (16, 0x003F0000, 'fill_pattern'), )) upkbitsL(xf.border, pkd_brdbkg1, ( (22, 0x01C00000, 'bottom_line_style'), (25, 0xFE000000L, 'bottom_colour_index'), )) upkbits(xf.border, pkd_brdbkg2, ( (0, 0x00000007, 'top_line_style'), (3, 0x00000038, 'left_line_style'), (6, 0x000001C0, 'right_line_style'), (9, 0x0000FE00, 'top_colour_index'), (16, 0x007F0000, 'left_colour_index'), (23, 0x3F800000, 'right_colour_index'), )) elif bv >= 40: unpack_fmt = '<BBHBBHI' ( xf.font_index, xf.format_key, pkd_type_par, pkd_align_orient, pkd_used, pkd_bkg_34, pkd_brd_34, ) = unpack(unpack_fmt, data[0:12]) upkbits(xf.protection, pkd_type_par, ( (0, 0x01, 'cell_locked'), (1, 0x02, 'formula_hidden'), )) upkbits( xf, pkd_type_par, ( (2, 0x0004, 'is_style'), (3, 0x0008, 'lotus_123_prefix'), # Meaning is not known. (4, 0xFFF0, 'parent_style_index'), )) upkbits(xf.alignment, pkd_align_orient, ( (0, 0x07, 'hor_align'), (3, 0x08, 'text_wrapped'), (4, 0x30, 'vert_align'), )) orientation = (pkd_align_orient & 0xC0) >> 6 xf.alignment.rotation = [0, 255, 90, 180][orientation] reg = pkd_used >> 2 for attr_stem in \ "format font alignment border background protection".split(): attr = "_" + attr_stem + "_flag" setattr(xf, attr, reg & 1) reg >>= 1 upkbits(xf.background, pkd_bkg_34, ( (0, 0x003F, 'fill_pattern'), (6, 0x07C0, 'pattern_colour_index'), (11, 0xF800, 'background_colour_index'), )) upkbitsL(xf.border, pkd_brd_34, ( (0, 0x00000007, 'top_line_style'), (3, 0x000000F8, 'top_colour_index'), (8, 0x00000700, 'left_line_style'), (11, 0x0000F800, 'left_colour_index'), (16, 0x00070000, 'bottom_line_style'), (19, 0x00F80000, 'bottom_colour_index'), (24, 0x07000000, 'right_line_style'), (27, 0xF8000000L, 'right_colour_index'), )) elif bv == 30: unpack_fmt = '<BBBBHHI' ( xf.font_index, xf.format_key, pkd_type_prot, pkd_used, pkd_align_par, pkd_bkg_34, pkd_brd_34, ) = unpack(unpack_fmt, data[0:12]) upkbits(xf.protection, pkd_type_prot, ( (0, 0x01, 'cell_locked'), (1, 0x02, 'formula_hidden'), )) upkbits( xf, pkd_type_prot, ( (2, 0x0004, 'is_style'), (3, 0x0008, 'lotus_123_prefix'), # Meaning is not known. )) upkbits(xf.alignment, pkd_align_par, ( (0, 0x07, 'hor_align'), (3, 0x08, 'text_wrapped'), )) upkbits(xf, pkd_align_par, ((4, 0xFFF0, 'parent_style_index'), )) reg = pkd_used >> 2 for attr_stem in \ "format font alignment border background protection".split(): attr = "_" + attr_stem + "_flag" setattr(xf, attr, reg & 1) reg >>= 1 upkbits(xf.background, pkd_bkg_34, ( (0, 0x003F, 'fill_pattern'), (6, 0x07C0, 'pattern_colour_index'), (11, 0xF800, 'background_colour_index'), )) upkbitsL(xf.border, pkd_brd_34, ( (0, 0x00000007, 'top_line_style'), (3, 0x000000F8, 'top_colour_index'), (8, 0x00000700, 'left_line_style'), (11, 0x0000F800, 'left_colour_index'), (16, 0x00070000, 'bottom_line_style'), (19, 0x00F80000, 'bottom_colour_index'), (24, 0x07000000, 'right_line_style'), (27, 0xF8000000L, 'right_colour_index'), )) xf.alignment.vert_align = 2 # bottom xf.alignment.rotation = 0 elif bv == 21: #### Warning: incomplete treatment; formatting_info not fully supported. #### Probably need to offset incoming BIFF2 XF[n] to BIFF8-like XF[n+16], #### and create XF[0:16] like the standard ones in BIFF8 #### *AND* add 16 to all XF references in cell records :-( (xf.font_index, format_etc, halign_etc) = unpack('<BxBB', data) xf.format_key = format_etc & 0x3F upkbits(xf.protection, format_etc, ( (6, 0x40, 'cell_locked'), (7, 0x80, 'formula_hidden'), )) upkbits(xf.alignment, halign_etc, ((0, 0x07, 'hor_align'), )) for mask, side in ((0x08, 'left'), (0x10, 'right'), (0x20, 'top'), (0x40, 'bottom')): if halign_etc & mask: colour_index, line_style = 8, 1 # black, thin else: colour_index, line_style = 0, 0 # none, none setattr(xf.border, side + '_colour_index', colour_index) setattr(xf.border, side + '_line_style', line_style) bg = xf.background if halign_etc & 0x80: bg.fill_pattern = 17 else: bg.fill_pattern = 0 bg.background_colour_index = 9 # white bg.pattern_colour_index = 8 # black xf.parent_style_index = 0 # ??????????? xf.alignment.vert_align = 2 # bottom xf.alignment.rotation = 0 for attr_stem in \ "format font alignment border background protection".split(): attr = "_" + attr_stem + "_flag" setattr(xf, attr, 1) else: raise XLRDError('programmer stuff-up: bv=%d' % bv) xf.xf_index = len(self.xf_list) self.xf_list.append(xf) self.xfcount += 1 if blah: xf.dump( self.logfile, header="--- handle_xf: xf[%d] ---" % xf.xf_index, footer=" ", ) # Now for some assertions ... if self.formatting_info: if self.verbosity and xf.is_style and xf.parent_style_index != 0x0FFF: msg = "WARNING *** XF[%d] is a style XF but parent_style_index is 0x%04x, not 0x0fff\n" fprintf(self.logfile, msg, xf.xf_index, xf.parent_style_index) check_colour_indexes_in_obj(self, xf, xf.xf_index) if not self.format_map.has_key(xf.format_key): msg = "WARNING *** XF[%d] unknown (raw) format key (%d, 0x%04x)\n" if self.verbosity: fprintf(self.logfile, msg, xf.xf_index, xf.format_key, xf.format_key) xf.format_key = 0