Пример #1
0
def read_properties_core(xml_source):
    """Read assorted file properties."""
    properties = DocumentProperties()
    root = fromstring(xml_source)
    creator_node = root.find(QName(NAMESPACES['dc'], 'creator').text)
    if creator_node is not None:
        properties.creator = creator_node.text
    else:
        properties.creator = ''
    last_modified_by_node = root.find(
        QName(NAMESPACES['cp'], 'lastModifiedBy').text)
    if last_modified_by_node is not None:
        properties.last_modified_by = last_modified_by_node.text
    else:
        properties.last_modified_by = ''

    created_node = root.find(QName(NAMESPACES['dcterms'], 'created').text)
    if created_node is not None:
        properties.created = W3CDTF_to_datetime(created_node.text)
    else:
        properties.created = datetime.datetime.now()

    modified_node = root.find(QName(NAMESPACES['dcterms'], 'modified').text)
    if modified_node is not None:
        properties.modified = W3CDTF_to_datetime(modified_node.text)
    else:
        properties.modified = properties.created

    return properties
Пример #2
0
def get_text(xmlns, rich_node):
    """Read rich text, discarding formatting if not disallowed"""
    text_node = rich_node.find(QName(xmlns, 't').text)
    partial_text = text_node.text or unicode('')

    if text_node.get(QName(NAMESPACES['xml'], 'space').text) != 'preserve':
        partial_text = partial_text.strip()
    return unicode(partial_text)
Пример #3
0
def parse_custom_num_formats(root, xmlns):
    """Read in custom numeric formatting rules from the shared style table"""
    custom_formats = {}
    num_fmts = root.find(QName(xmlns, 'numFmts').text)
    if num_fmts is not None:
        num_fmt_nodes = num_fmts.findall(QName(xmlns, 'numFmt').text)
        for num_fmt_node in num_fmt_nodes:
            custom_formats[int(num_fmt_node.get('numFmtId'))] = \
                    num_fmt_node.get('formatCode').lower()
    return custom_formats
Пример #4
0
def parse_fonts(root, xmlns, color_index):
    """Read in the fonts"""
    font_list = []
    fonts = root.find(QName(xmlns, 'fonts').text)
    if fonts is not None:
        font_nodes = fonts.findall(QName(xmlns, 'font').text)
        for font_node in font_nodes:
            font = Font()
            font.size = font_node.find(QName(xmlns, 'sz').text).get('val')
            font.name = font_node.find(QName(xmlns, 'name').text).get('val')
            font.bold = True if len(font_node.findall(QName(
                xmlns, 'b').text)) else False
            font.italic = True if len(font_node.findall(
                QName(xmlns, 'i').text)) else False
            if len(font_node.findall(QName(xmlns, 'u').text)):
                underline = font_node.find(QName(xmlns, 'u').text).get('val')
                font.underline = underline if underline else 'single'
            color = font_node.find(QName(xmlns, 'color').text)
            if color is not None:
                if color.get('indexed') is not None and 0 <= int(
                        color.get('indexed')) < len(color_index):
                    font.color.index = color_index[int(color.get('indexed'))]
                elif color.get('theme') is not None:
                    if color.get('tint') is not None:
                        font.color.index = 'theme:%s:%s' % (color.get('theme'),
                                                            color.get('tint'))
                    else:
                        font.color.index = 'theme:%s:' % color.get(
                            'theme')  # prefix color with theme
                elif color.get('rgb'):
                    font.color.index = color.get('rgb')
            font_list.append(font)
    return font_list
Пример #5
0
def parse_color_index(root, xmlns):
    """Read in the list of indexed colors"""
    color_index = []
    colors = root.find(QName(xmlns, 'colors').text)
    if colors is not None:
        indexedColors = colors.find(QName(xmlns, 'indexedColors').text)
        if indexedColors is not None:
            color_nodes = indexedColors.findall(QName(xmlns, 'rgbColor').text)
            for color_node in color_nodes:
                color_index.append(color_node.get('rgb'))
    return color_index
Пример #6
0
def parse_fills(root, xmlns, color_index):
    """Read in the list of fills"""
    fill_list = []
    fills = root.find(QName(xmlns, 'fills').text)
    count = 0
    if fills is not None:
        fillNodes = fills.findall(QName(xmlns, 'fill').text)
        for fill in fillNodes:
            newFill = Fill()
            # Rotation is unset
            patternFill = fill.find(QName(xmlns, 'patternFill').text)
            if patternFill is not None:
                newFill.fill_type = patternFill.get('patternType')

                fgColor = patternFill.find(QName(xmlns, 'fgColor').text)
                if fgColor is not None:
                    if fgColor.get('indexed') is not None and 0 <= int(
                            fgColor.get('indexed')) < len(color_index):
                        newFill.start_color.index = color_index[int(
                            fgColor.get('indexed'))]
                    elif fgColor.get('theme') is not None:
                        if fgColor.get('tint') is not None:
                            newFill.start_color.index = 'theme:%s:%s' % (
                                fgColor.get('theme'), fgColor.get('tint'))
                        else:
                            newFill.start_color.index = 'theme:%s:' % fgColor.get(
                                'theme')  # prefix color with theme
                    else:
                        newFill.start_color.index = fgColor.get('rgb')

                bgColor = patternFill.find(QName(xmlns, 'bgColor').text)
                if bgColor is not None:
                    if bgColor.get('indexed') is not None and 0 <= int(
                            bgColor.get('indexed')) < len(color_index):
                        newFill.end_color.index = color_index[int(
                            bgColor.get('indexed'))]
                    elif bgColor.get('theme') is not None:
                        if bgColor.get('tint') is not None:
                            newFill.end_color.index = 'theme:%s:%s' % (
                                bgColor.get('theme'), bgColor.get('tint'))
                        else:
                            newFill.end_color.index = 'theme:%s:' % bgColor.get(
                                'theme')  # prefix color with theme
                    elif bgColor.get('rgb'):
                        newFill.end_color.index = bgColor.get('rgb')
            count += 1
            fill_list.append(newFill)
    return fill_list
Пример #7
0
def read_sheets_titles(xml_source):
    """Read titles for all sheets."""
    root = fromstring(xml_source)
    titles_root = root.find(
        QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main',
              'sheets').text)

    return [sheet.get('name') for sheet in titles_root.getchildren()]
Пример #8
0
def read_excel_base_date(xml_source):
    root = fromstring(text=xml_source)
    wbPr = root.find(
        QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main',
              'workbookPr').text)
    if ('date1904' in wbPr.keys()
            and wbPr.attrib['date1904'] in ('1', 'true')):
        return CALENDAR_MAC_1904

    return CALENDAR_WINDOWS_1900
Пример #9
0
def parse_borders(root, xmlns, color_index):
    """Read in the boarders"""
    border_list = []
    borders = root.find(QName(xmlns, 'borders').text)
    if borders is not None:
        boarderNodes = borders.findall(QName(xmlns, 'border').text)
        count = 0
        for boarder in boarderNodes:
            newBorder = Borders()
            if boarder.get('diagonalup') == 1:
                newBorder.diagonal_direction = newBorder.DIAGONAL_UP
            if boarder.get('diagonalDown') == 1:
                if newBorder.diagonal_direction == newBorder.DIAGONAL_UP:
                    newBorder.diagonal_direction = newBorder.DIAGONAL_BOTH
                else:
                    newBorder.diagonal_direction = newBorder.DIAGONAL_DOWN

            for side in ('left', 'right', 'top', 'bottom', 'diagonal'):
                node = boarder.find(QName(xmlns, side).text)
                if node is not None:
                    borderSide = getattr(newBorder, side)
                    if node.get('style') is not None:
                        borderSide.border_style = node.get('style')
                    color = node.find(QName(xmlns, 'color').text)
                    if color is not None:
                        # Ignore 'auto'
                        if color.get('indexed') is not None and 0 <= int(
                                color.get('indexed')) < len(color_index):
                            borderSide.color.index = color_index[int(
                                color.get('indexed'))]
                        elif color.get('theme') is not None:
                            if color.get('tint') is not None:
                                borderSide.color.index = 'theme:%s:%s' % (
                                    color.get('theme'), color.get('tint'))
                            else:
                                borderSide.color.index = 'theme:%s:' % color.get(
                                    'theme')  # prefix color with theme
                        elif color.get('rgb'):
                            borderSide.color.index = color.get('rgb')
            count += 1
            border_list.append(newBorder)

    return border_list
Пример #10
0
def get_string(xmlns, string_index_node):
    """Read the contents of a specific string index"""
    rich_nodes = string_index_node.findall(QName(xmlns, 'r').text)
    if rich_nodes:
        reconstructed_text = []
        for rich_node in rich_nodes:
            partial_text = get_text(xmlns, rich_node)
            reconstructed_text.append(partial_text)
        return unicode(''.join(reconstructed_text))
    else:
        return get_text(xmlns, string_index_node)
Пример #11
0
def read_named_ranges(xml_source, workbook):
    """Read named ranges, excluding poorly defined ranges."""
    named_ranges = []
    root = fromstring(xml_source)
    names_root = root.find(
        QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main',
              'definedNames').text)
    if names_root is not None:

        for name_node in names_root.getchildren():
            range_name = name_node.get('name')

            if name_node.get("hidden", '0') == '1':
                continue

            valid = True

            for discarded_range in DISCARDED_RANGES:
                if discarded_range in range_name:
                    valid = False

            for bad_range in BUGGY_NAMED_RANGES:
                if bad_range in name_node.text:
                    valid = False

            if valid:
                if refers_to_range(name_node.text):
                    destinations = split_named_range(name_node.text)

                    new_destinations = []
                    for worksheet, cells_range in destinations:
                        # it can happen that a valid named range references
                        # a missing worksheet, when Excel didn't properly maintain
                        # the named range list
                        #
                        # we just ignore them here
                        worksheet = workbook.get_sheet_by_name(worksheet)
                        if worksheet:
                            new_destinations.append((worksheet, cells_range))

                    named_range = NamedRange(range_name, new_destinations)
                else:
                    named_range = NamedRangeContainingValue(
                        range_name, name_node.text)

                location_id = name_node.get("localSheetId")
                if location_id:
                    named_range.scope = workbook.worksheets[int(location_id)]

                named_ranges.append(named_range)

    return named_ranges
Пример #12
0
def read_string_table(xml_source):
    """Read in all shared strings in the table"""
    table = {}
    xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
    root = fromstring(text=xml_source)
    string_index_nodes = root.findall(QName(xmlns, 'si').text)
    for index, string_index_node in enumerate(string_index_nodes):

        string = get_string(xmlns, string_index_node)

        # fix XML escaping sequence for '_x'
        string = string.replace('x005F_', '')

        table[index] = string

    return table
Пример #13
0
def read_style_table(xml_source):
    """Read styles from the shared style table"""
    table = {}
    xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
    root = fromstring(xml_source)
    custom_num_formats = parse_custom_num_formats(root, xmlns)
    color_index = parse_color_index(root, xmlns)
    font_list = parse_fonts(root, xmlns, color_index)
    fill_list = parse_fills(root, xmlns, color_index)
    border_list = parse_borders(root, xmlns, color_index)
    builtin_formats = NumberFormat._BUILTIN_FORMATS
    cell_xfs = root.find(QName(xmlns, 'cellXfs').text)
    if cell_xfs is not None:  # can happen on bad OOXML writers (e.g. Gnumeric)
        cell_xfs_nodes = cell_xfs.findall(QName(xmlns, 'xf').text)
        for index, cell_xfs_node in enumerate(cell_xfs_nodes):
            new_style = Style()
            number_format_id = int(cell_xfs_node.get('numFmtId'))
            if number_format_id < 164:
                new_style.number_format.format_code = \
                        builtin_formats.get(number_format_id, 'General')
            else:

                if number_format_id in custom_num_formats:
                    new_style.number_format.format_code = \
                            custom_num_formats[number_format_id]
                else:
                    raise MissingNumberFormat('%s' % number_format_id)

            if cell_xfs_node.get('applyAlignment') == '1':
                alignment = cell_xfs_node.find(QName(xmlns, 'alignment').text)
                if alignment is not None:
                    if alignment.get('horizontal') is not None:
                        new_style.alignment.horizontal = alignment.get(
                            'horizontal')
                    if alignment.get('vertical') is not None:
                        new_style.alignment.vertical = alignment.get(
                            'vertical')
                    if alignment.get('wrapText'):
                        new_style.alignment.wrap_text = True
                    if alignment.get('shrinkToFit'):
                        new_style.alignment.shrink_to_fit = True
                    if alignment.get('indent') is not None:
                        new_style.alignment.ident = int(
                            alignment.get('indent'))
                    if alignment.get('textRotation') is not None:
                        new_style.alignment.text_rotation = int(
                            alignment.get('textRotation'))
                    # ignore justifyLastLine option when horizontal = distributed

            if cell_xfs_node.get('applyFont') == '1':
                new_style.font = deepcopy(font_list[int(
                    cell_xfs_node.get('fontId'))])
                new_style.font.color = deepcopy(font_list[int(
                    cell_xfs_node.get('fontId'))].color)

            if cell_xfs_node.get('applyFill') == '1':
                new_style.fill = deepcopy(fill_list[int(
                    cell_xfs_node.get('fillId'))])
                new_style.fill.start_color = deepcopy(fill_list[int(
                    cell_xfs_node.get('fillId'))].start_color)
                new_style.fill.end_color = deepcopy(fill_list[int(
                    cell_xfs_node.get('fillId'))].end_color)

            if cell_xfs_node.get('applyBorder') == '1':
                new_style.borders = deepcopy(border_list[int(
                    cell_xfs_node.get('borderId'))])
                new_style.borders.left = deepcopy(border_list[int(
                    cell_xfs_node.get('borderId'))].left)
                new_style.borders.left.color = deepcopy(border_list[int(
                    cell_xfs_node.get('borderId'))].left.color)
                new_style.borders.right = deepcopy(border_list[int(
                    cell_xfs_node.get('borderId'))].right)
                new_style.borders.right.color = deepcopy(border_list[int(
                    cell_xfs_node.get('borderId'))].right.color)
                new_style.borders.top = deepcopy(border_list[int(
                    cell_xfs_node.get('borderId'))].top)
                new_style.borders.top.color = deepcopy(border_list[int(
                    cell_xfs_node.get('borderId'))].top.color)
                new_style.borders.bottom = deepcopy(border_list[int(
                    cell_xfs_node.get('borderId'))].bottom)
                new_style.borders.bottom.color = deepcopy(border_list[int(
                    cell_xfs_node.get('borderId'))].bottom.color)
                new_style.borders.diagonal = deepcopy(border_list[int(
                    cell_xfs_node.get('borderId'))].diagonal)
                new_style.borders.diagonal.color = deepcopy(border_list[int(
                    cell_xfs_node.get('borderId'))].diagonal.color)

            if cell_xfs_node.get('applyProtection') == '1':
                protection = cell_xfs_node.find(
                    QName(xmlns, 'protection').text)
                # Ignore if there are no protection sub-nodes
                if protection is not None:
                    if protection.get('locked') is not None:
                        if protection.get('locked') == '1':
                            new_style.protection.locked = Protection.PROTECTION_PROTECTED
                        else:
                            new_style.protection.locked = Protection.PROTECTION_UNPROTECTED
                    if protection.get('hidden') is not None:
                        if protection.get('hidden') == '1':
                            new_style.protection.hidden = Protection.PROTECTION_PROTECTED
                        else:
                            new_style.protection.hidden = Protection.PROTECTION_UNPROTECTED

            table[index] = new_style
    return table
Пример #14
0
def fast_parse(ws, xml_source, string_table, style_table):

    xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
    root = fromstring(xml_source)

    mergeCells = root.find(QName(xmlns, 'mergeCells').text)
    if mergeCells is not None:
        mergeCellNodes = mergeCells.findall(QName(xmlns, 'mergeCell').text)
        for mergeCell in mergeCellNodes:
            ws.merge_cells(mergeCell.get('ref'))

    source = _get_xml_iter(xml_source)

    it = iterparse(source)

    for event, element in filter(filter_cells, it):

        value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v')

        coordinate = element.get('r')
        style_id = element.get('s')
        if style_id is not None:
            ws._styles[coordinate] = style_table.get(int(style_id))

        if value is not None:
            data_type = element.get('t', 'n')
            if data_type == Cell.TYPE_STRING:
                value = string_table.get(int(value))

            ws.cell(coordinate).value = value

        # to avoid memory exhaustion, clear the item after use
        element.clear()

    cols = root.find(QName(xmlns, 'cols').text)
    if cols is not None:
        colNodes = cols.findall(QName(xmlns, 'col').text)
        for col in colNodes:
            min = int(col.get('min')) if col.get('min') else 1
            max = int(col.get('max')) if col.get('max') else 1
            for colId in range(min, max + 1):
                column = get_column_letter(colId)
                if column not in ws.column_dimensions:
                    ws.column_dimensions[column] = ColumnDimension(column)
                if col.get('width') is not None:
                    ws.column_dimensions[column].width = float(col.get('width'))
                if col.get('bestFit') == '1':
                    ws.column_dimensions[column].auto_size = True
                if col.get('hidden') == '1':
                    ws.column_dimensions[column].visible = False
                if col.get('outlineLevel') is not None:
                    ws.column_dimensions[column].outline_level = int(col.get('outlineLevel'))
                if col.get('collapsed') == '1':
                    ws.column_dimensions[column].collapsed = True
                if col.get('style') is not None:
                    ws.column_dimensions[column].style_index = col.get('style')

    printOptions = root.find(QName(xmlns, 'printOptions').text)
    if printOptions is not None:
        if printOptions.get('horizontalCentered') is not None:
            ws.page_setup.horizontalCentered = printOptions.get('horizontalCentered')
        if printOptions.get('verticalCentered') is not None:
            ws.page_setup.verticalCentered = printOptions.get('verticalCentered')

    pageMargins = root.find(QName(xmlns, 'pageMargins').text)
    if pageMargins is not None:
        if pageMargins.get('left') is not None:
            ws.page_margins.left = float(pageMargins.get('left'))
        if pageMargins.get('right') is not None:
            ws.page_margins.right = float(pageMargins.get('right'))
        if pageMargins.get('top') is not None:
            ws.page_margins.top = float(pageMargins.get('top'))
        if pageMargins.get('bottom') is not None:
            ws.page_margins.bottom = float(pageMargins.get('bottom'))
        if pageMargins.get('header') is not None:
            ws.page_margins.header = float(pageMargins.get('header'))
        if pageMargins.get('footer') is not None:
            ws.page_margins.footer = float(pageMargins.get('footer'))

    pageSetup = root.find(QName(xmlns, 'pageSetup').text)
    if pageSetup is not None:
        if pageSetup.get('orientation') is not None:
            ws.page_setup.orientation = pageSetup.get('orientation')
        if pageSetup.get('paperSize') is not None:
            ws.page_setup.paperSize = pageSetup.get('paperSize')
        if pageSetup.get('scale') is not None:
            ws.page_setup.top = pageSetup.get('scale')
        if pageSetup.get('fitToPage') is not None:
            ws.page_setup.fitToPage = pageSetup.get('fitToPage')
        if pageSetup.get('fitToHeight') is not None:
            ws.page_setup.fitToHeight = pageSetup.get('fitToHeight')
        if pageSetup.get('fitToWidth') is not None:
            ws.page_setup.fitToWidth = pageSetup.get('fitToWidth')
        if pageSetup.get('firstPageNumber') is not None:
            ws.page_setup.firstPageNumber = pageSetup.get('firstPageNumber')
        if pageSetup.get('useFirstPageNumber') is not None:
            ws.page_setup.useFirstPageNumber = pageSetup.get('useFirstPageNumber')

    headerFooter = root.find(QName(xmlns, 'headerFooter').text)
    if headerFooter is not None:
        oddHeader = headerFooter.find(QName(xmlns, 'oddHeader').text)
        if oddHeader is not None:
            ws.header_footer.setHeader(oddHeader.text)
        oddFooter = headerFooter.find(QName(xmlns, 'oddFooter').text)
        if oddFooter is not None:
            ws.header_footer.setFooter(oddFooter.text)