Exemplo n.º 1
0
    def get_cells(self, min_row, min_col, max_row, max_col):
        p = iterparse(self.xml_source)

        for _event, element in p:

            if element.tag == '{%s}c' % SHEET_MAIN_NS:
                coord = element.get('r')
                column_str, row = RE_COORDINATE.match(coord).groups()

                row = int(row)
                column = column_index_from_string(column_str)

                if min_col <= column <= max_col and min_row <= row <= max_row:
                    data_type = element.get('t', 'n')
                    style_id = element.get('s')
                    formula = element.findtext('{%s}f' % SHEET_MAIN_NS)
                    value = element.findtext('{%s}v' % SHEET_MAIN_NS)
                    if formula is not None and not self.parent.data_only:
                        data_type = Cell.TYPE_FORMULA
                        value = "=" + formula
                    yield RawCell(row, column_str, coord, value, data_type, style_id, None)
            # sub-elements of cells should be skipped
            if (element.tag == '{%s}v' % SHEET_MAIN_NS
                or element.tag == '{%s}f' % SHEET_MAIN_NS):
                continue
            element.clear()
Exemplo n.º 2
0
def fast_parse(ws, xml_source, string_table, style_table):

    source = _get_xml_iter(xml_source)

    it = iterparse(source)

    for event, element in ifilter(filter_cells, it):

        value = element.findtext(
            '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v')

        if value is not None:

            coordinate = element.get('r')
            data_type = element.get('t', 'n')
            style_id = element.get('s')

            if data_type == Cell.TYPE_STRING:
                value = string_table.get(int(value))

            ws.cell(coordinate).value = value

            if style_id is not None:
                ws._styles[coordinate] = style_table.get(int(style_id))

        # to avoid memory exhaustion, clear the item after use
        element.clear()
Exemplo n.º 3
0
def iter_rows(workbook_name, sheet_name, xml_source, range_string='', row_offset=0, column_offset=0):

    archive = get_archive_file(workbook_name)

    source = xml_source

    if range_string:
        min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset)
    else:
        min_col, min_row, max_col, max_row = read_dimension(xml_source=source)
        min_col = column_index_from_string(min_col)
        max_col = column_index_from_string(max_col) + 1
        max_row += 6

    try:
        string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
    except KeyError:
        string_table = {}

    style_table = read_style_table(archive.read(ARC_STYLE))

    source.seek(0)
    p = iterparse(source)

    return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table)
Exemplo n.º 4
0
def iter_rows(workbook_name,
              sheet_name,
              xml_source,
              shared_date,
              string_table,
              range_string='',
              row_offset=0,
              column_offset=0):

    archive = get_archive_file(workbook_name)

    source = xml_source

    if range_string:
        min_col, min_row, max_col, max_row = get_range_boundaries(
            range_string, row_offset, column_offset)
    else:
        min_col, min_row, max_col, max_row = read_dimension(xml_source=source)
        min_col = column_index_from_string(min_col)
        max_col = column_index_from_string(max_col) + 1
        max_row += 6

    style_table = read_style_table(archive.read(ARC_STYLE))

    source.seek(0)
    p = iterparse(source)

    return get_squared_range(p, min_col, min_row, max_col, max_row,
                             string_table, style_table, shared_date)
Exemplo n.º 5
0
def read_dimension(xml_source):
    min_row = min_col =  max_row = max_col = None
    source = _get_xml_iter(xml_source)
    it = iterparse(source)
    for event, el in it:
        if el.tag == '{%s}dimension' % SHEET_MAIN_NS:
            dim = el.get("ref")
            if ':' in dim:
                start, stop = dim.split(':')
            else:
                start = stop = dim
            min_col, min_row = coordinate_from_string(start)
            max_col, max_row = coordinate_from_string(stop)
            return min_col, min_row, max_col, max_row

        if el.tag == '{%s}row' % SHEET_MAIN_NS:
            row = el.get("r")
            if min_row is None:
                min_row = int(row)
            span = el.get("spans")
            start, stop = span.split(":")
            if min_col is None:
                min_col = int(start)
                max_col = int(stop)
            else:
                min_col = min(min_col, int(start))
                max_col = max(max_col, int(stop))
    max_row = int(row)
    warn("Unsized worksheet")
    return get_column_letter(min_col), min_row, get_column_letter(max_col),  max_row
Exemplo n.º 6
0
def read_dimension(xml_source):

    source = _get_xml_iter(xml_source)

    it = iterparse(source)

    for event, element in it:

        if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}dimension':
            ref = element.get('ref')

            if ':' in ref:
                min_range, max_range = ref.split(':')
            else:
                min_range = max_range = ref

            min_col, min_row = coordinate_from_string(min_range)
            max_col, max_row = coordinate_from_string(max_range)

            return min_col, min_row, max_col, max_row

        else:
            element.clear()

    return None
Exemplo n.º 7
0
def fast_parse(ws, xml_source, string_table, style_table):

    source = _get_xml_iter(xml_source) 

    it = iterparse(source)

    for event, element in ifilter(filter_cells, it):

        value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v')

        if value is not None:

            coordinate = element.get('r')
            data_type = element.get('t', 'n')
            style_id = element.get('s')

            if data_type == Cell.TYPE_STRING:
                value = string_table.get(int(value))

            ws.cell(coordinate).value = value

            if style_id is not None:
                ws._styles[coordinate] = style_table.get(int(style_id))

        # to avoid memory exhaustion, clear the item after use
        element.clear()
Exemplo n.º 8
0
def read_dimension(xml_source):

    source = _get_xml_iter(xml_source) 

    it = iterparse(source)

    for event, element in it:

        if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}dimension':
            ref = element.get('ref')
            
            if ':' in ref:
                min_range, max_range = ref.split(':')
            else:
                min_range = max_range = ref

            min_col, min_row = coordinate_from_string(min_range)
            max_col, max_row = coordinate_from_string(max_range)

            return min_col, min_row, max_col, max_row

        else:
            element.clear()

    return None
Exemplo n.º 9
0
def read_dimension(xml_source):

    source = _get_xml_iter(xml_source)

    it = iterparse(source)

    smax_col = None
    smax_row = None
    smin_col = None
    smin_row = None

    for event, element in it:

        if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}dimension':
            ref = element.get('ref')

            if ':' in ref:
                min_range, max_range = ref.split(':')
            else:
                min_range = max_range = ref

            min_col, min_row = coordinate_from_string(min_range)
            max_col, max_row = coordinate_from_string(max_range)

            return min_col, min_row, max_col, max_row

        if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}c':
            # Supposedly the dimension is mandatory, but in practice it can be
            # left off sometimes, if so, observe the max/min extants and return
            # those instead.
            col, row = coordinate_from_string(element.get('r'))
            if smin_row is None:
                #initialize the observed max/min values
                smin_col = smax_col = col
                smin_row = smax_row = row
            else:
                #Keep track of the seen max and min (fallback if there's no dimension)
                smin_col = min(smin_col, col)
                smin_row = min(smin_row, row)
                smax_col = max(smax_col, col)
                smax_row = max(smax_row, row)
        else:
            element.clear()

    return smin_col, smin_row, smax_col, smax_row
Exemplo n.º 10
0
def read_dimension(xml_source):

    source = _get_xml_iter(xml_source)

    it = iterparse(source)

    smax_col = None
    smax_row = None
    smin_col = None
    smin_row = None

    for event, element in it:

        if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}dimension':
            ref = element.get('ref')

            if ':' in ref:
                min_range, max_range = ref.split(':')
            else:
                min_range = max_range = ref

            min_col, min_row = coordinate_from_string(min_range)
            max_col, max_row = coordinate_from_string(max_range)

            return min_col, min_row, max_col, max_row

        if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}c':
            # Supposedly the dimension is mandatory, but in practice it can be
            # left off sometimes, if so, observe the max/min extants and return
            # those instead.
            col, row = coordinate_from_string(element.get('r'))
            if smin_row is None:
                #initialize the observed max/min values
                smin_col = smax_col = col
                smin_row = smax_row = row
            else:
                #Keep track of the seen max and min (fallback if there's no dimension)
                smin_col = min(smin_col, col)
                smin_row = min(smin_row, row)
                smax_col = max(smax_col, col)
                smax_row = max(smax_row, row)
        else:
            element.clear()

    return smin_col, smin_row, smax_col, smax_row
Exemplo n.º 11
0
    def parse(self):
        stream = _get_xml_iter(self.source)
        it = iterparse(stream)

        dispatcher = {
            '{%s}c' % SHEET_MAIN_NS: self.parse_cell,
            '{%s}mergeCells' % SHEET_MAIN_NS: self.parse_merge,
            '{%s}cols' % SHEET_MAIN_NS: self.parse_column_dimensions,
            '{%s}sheetData' % SHEET_MAIN_NS: self.parse_row_dimensions,
            '{%s}printOptions' % SHEET_MAIN_NS: self.parse_print_options,
            '{%s}pageMargins' % SHEET_MAIN_NS: self.parse_margins,
            '{%s}pageSetup' % SHEET_MAIN_NS: self.parse_page_setup,
            '{%s}headerFooter' % SHEET_MAIN_NS: self.parse_header_footer,
            '{%s}conditionalFormatting' % SHEET_MAIN_NS: self.parser_conditional_formatting
                      }
        for event, element in it:
            tag_name = element.tag
            if tag_name in dispatcher:
                dispatcher[tag_name](element)
def iter_rows(workbook_name, sheet_name, xml_source, shared_date, string_table, range_string='', row_offset=0, column_offset=0):

    archive = get_archive_file(workbook_name)

    source = xml_source

    if range_string:
        min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset)
    else:
        min_col, min_row, max_col, max_row = read_dimension(xml_source=source)
        min_col = column_index_from_string(min_col)
        max_col = column_index_from_string(max_col) + 1
        max_row += 6

    style_properties = read_style_table(archive.read(ARC_STYLE))
    style_table = style_properties.pop('table')

    source.seek(0)
    p = iterparse(source)

    return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table, shared_date)
Exemplo n.º 13
0
def fast_parse(ws, xml_source, string_table, style_table):

    xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
    root = fromstring(xml_source)

    mergeCells = root.find(QName(xmlns, 'mergeCells').text)
    if mergeCells is not None:
        mergeCellNodes = mergeCells.findall(QName(xmlns, 'mergeCell').text)
        for mergeCell in mergeCellNodes:
            ws.merge_cells(mergeCell.get('ref'))

    source = _get_xml_iter(xml_source)

    it = iterparse(source)

    formula_table = {}
    import re
    from openpyxl.cell import column_index_from_string 
    ALL_COORD_RE = re.compile('([$]?)([A-Z]+)([$]?)(\d+)')

    def joffset_cell(start_coord,stop_coord,target_coord):
        
        start_col,start_row = column_index_from_string(start_coord[1]),int(start_coord[3])
        stop_col,stop_row = column_index_from_string(stop_coord[1]),int(stop_coord[3])
        target_col,target_row = column_index_from_string(target_coord[1]),int(target_coord[3])
        
        diff_row = stop_row - start_row
        diff_col = stop_col - start_col
        
        if target_coord[2]:
            out_row = target_row
        else:
            out_row = target_row + diff_row
        if target_coord[0]:
            print target_coord
            out_col = target_col
        else:
            out_col = target_col + diff_col 
        
        return get_column_letter(out_col) + str(out_row)

    def joffset_formula(start,stop,formula):
        start_coord = ALL_COORD_RE.match(start).groups()
        stop_coord = ALL_COORD_RE.match(stop).groups()
        return ALL_COORD_RE.sub(lambda x: joffset_cell(start_coord,stop_coord,x.groups()),formula)
    
    for event, element in filter(filter_cells, it):
        formula = element.find('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}f')                    
        value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v')
        
        coordinate = element.get('r')
        
        if formula is not None:            
            formula_text = ''
            if value is None:                
                value = SubElement(element,'v')                            

            if formula.text:
                formula_text = formula.text                
                if formula.get('t',None) == 'shared': #if there is a formula, add it to teh ref                                         
                    formula_table[formula.get('si')] = {'text': formula_text, 'cell': coordinate }                    
            else:
                shared_formula = formula_table[formula.get('si')]
                formula_text = joffset_formula(shared_formula['cell'],coordinate,shared_formula['text'])                
            
            value = '=' + formula_text
        
        style_id = element.get('s')
        if style_id is not None:
            ws._styles[coordinate] = style_table.get(int(style_id))

        if value is not None:
            data_type = element.get('t', 'n')
            if formula:
                dat_type = 'f'
            if data_type == Cell.TYPE_STRING:
                value = string_table.get(int(value))

            ws.cell(coordinate).value = value

        # to avoid memory exhaustion, clear the item after use
        element.clear()

    cols = root.find(QName(xmlns, 'cols').text)
    if cols is not None:
        colNodes = cols.findall(QName(xmlns, 'col').text)
        for col in colNodes:
            min = int(col.get('min')) if col.get('min') else 1
            max = int(col.get('max')) if col.get('max') else 1
            for colId in range(min, max + 1):
                column = get_column_letter(colId)
                if column not in ws.column_dimensions:
                    ws.column_dimensions[column] = ColumnDimension(column)
                if col.get('width') is not None:
                    ws.column_dimensions[column].width = float(col.get('width'))
                if col.get('bestFit') == '1':
                    ws.column_dimensions[column].auto_size = True
                if col.get('hidden') == '1':
                    ws.column_dimensions[column].visible = False
                if col.get('outlineLevel') is not None:
                    ws.column_dimensions[column].outline_level = int(col.get('outlineLevel'))
                if col.get('collapsed') == '1':
                    ws.column_dimensions[column].collapsed = True
                if col.get('style') is not None:
                    ws.column_dimensions[column].style_index = col.get('style')

    printOptions = root.find(QName(xmlns, 'printOptions').text)
    if printOptions is not None:
        if printOptions.get('horizontalCentered') is not None:
            ws.page_setup.horizontalCentered = printOptions.get('horizontalCentered')
        if printOptions.get('verticalCentered') is not None:
            ws.page_setup.verticalCentered = printOptions.get('verticalCentered')

    pageMargins = root.find(QName(xmlns, 'pageMargins').text)
    if pageMargins is not None:
        if pageMargins.get('left') is not None:
            ws.page_margins.left = float(pageMargins.get('left'))
        if pageMargins.get('right') is not None:
            ws.page_margins.right = float(pageMargins.get('right'))
        if pageMargins.get('top') is not None:
            ws.page_margins.top = float(pageMargins.get('top'))
        if pageMargins.get('bottom') is not None:
            ws.page_margins.bottom = float(pageMargins.get('bottom'))
        if pageMargins.get('header') is not None:
            ws.page_margins.header = float(pageMargins.get('header'))
        if pageMargins.get('footer') is not None:
            ws.page_margins.footer = float(pageMargins.get('footer'))

    pageSetup = root.find(QName(xmlns, 'pageSetup').text)
    if pageSetup is not None:
        if pageSetup.get('orientation') is not None:
            ws.page_setup.orientation = pageSetup.get('orientation')
        if pageSetup.get('paperSize') is not None:
            ws.page_setup.paperSize = pageSetup.get('paperSize')
        if pageSetup.get('scale') is not None:
            ws.page_setup.top = pageSetup.get('scale')
        if pageSetup.get('fitToPage') is not None:
            ws.page_setup.fitToPage = pageSetup.get('fitToPage')
        if pageSetup.get('fitToHeight') is not None:
            ws.page_setup.fitToHeight = pageSetup.get('fitToHeight')
        if pageSetup.get('fitToWidth') is not None:
            ws.page_setup.fitToWidth = pageSetup.get('fitToWidth')
        if pageSetup.get('firstPageNumber') is not None:
            ws.page_setup.firstPageNumber = pageSetup.get('firstPageNumber')
        if pageSetup.get('useFirstPageNumber') is not None:
            ws.page_setup.useFirstPageNumber = pageSetup.get('useFirstPageNumber')

    headerFooter = root.find(QName(xmlns, 'headerFooter').text)
    if headerFooter is not None:
        oddHeader = headerFooter.find(QName(xmlns, 'oddHeader').text)
        if oddHeader is not None:
            ws.header_footer.setHeader(oddHeader.text)
        oddFooter = headerFooter.find(QName(xmlns, 'oddFooter').text)
        if oddFooter is not None:
            ws.header_footer.setFooter(oddFooter.text)
Exemplo n.º 14
0
def fast_parse(ws, xml_source, string_table, style_table):

    xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
    root = fromstring(xml_source)

    mergeCells = root.find(QName(xmlns, 'mergeCells').text)
    if mergeCells is not None:
        mergeCellNodes = mergeCells.findall(QName(xmlns, 'mergeCell').text)
        for mergeCell in mergeCellNodes:
            ws.merge_cells(mergeCell.get('ref'))

    source = _get_xml_iter(xml_source)

    it = iterparse(source)

    for event, element in filter(filter_cells, it):

        value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v')

        coordinate = element.get('r')
        style_id = element.get('s')
        if style_id is not None:
            ws._styles[coordinate] = style_table.get(int(style_id))

        if value is not None:
            data_type = element.get('t', 'n')
            if data_type == Cell.TYPE_STRING:
                value = string_table.get(int(value))

            ws.cell(coordinate).value = value

        # to avoid memory exhaustion, clear the item after use
        element.clear()

    cols = root.find(QName(xmlns, 'cols').text)
    if cols is not None:
        colNodes = cols.findall(QName(xmlns, 'col').text)
        for col in colNodes:
            min = int(col.get('min')) if col.get('min') else 1
            max = int(col.get('max')) if col.get('max') else 1
            for colId in range(min, max + 1):
                column = get_column_letter(colId)
                if column not in ws.column_dimensions:
                    ws.column_dimensions[column] = ColumnDimension(column)
                if col.get('width') is not None:
                    ws.column_dimensions[column].width = float(col.get('width'))
                if col.get('bestFit') == '1':
                    ws.column_dimensions[column].auto_size = True
                if col.get('hidden') == '1':
                    ws.column_dimensions[column].visible = False
                if col.get('outlineLevel') is not None:
                    ws.column_dimensions[column].outline_level = int(col.get('outlineLevel'))
                if col.get('collapsed') == '1':
                    ws.column_dimensions[column].collapsed = True
                if col.get('style') is not None:
                    ws.column_dimensions[column].style_index = col.get('style')

    printOptions = root.find(QName(xmlns, 'printOptions').text)
    if printOptions is not None:
        if printOptions.get('horizontalCentered') is not None:
            ws.page_setup.horizontalCentered = printOptions.get('horizontalCentered')
        if printOptions.get('verticalCentered') is not None:
            ws.page_setup.verticalCentered = printOptions.get('verticalCentered')

    pageMargins = root.find(QName(xmlns, 'pageMargins').text)
    if pageMargins is not None:
        if pageMargins.get('left') is not None:
            ws.page_margins.left = float(pageMargins.get('left'))
        if pageMargins.get('right') is not None:
            ws.page_margins.right = float(pageMargins.get('right'))
        if pageMargins.get('top') is not None:
            ws.page_margins.top = float(pageMargins.get('top'))
        if pageMargins.get('bottom') is not None:
            ws.page_margins.bottom = float(pageMargins.get('bottom'))
        if pageMargins.get('header') is not None:
            ws.page_margins.header = float(pageMargins.get('header'))
        if pageMargins.get('footer') is not None:
            ws.page_margins.footer = float(pageMargins.get('footer'))

    pageSetup = root.find(QName(xmlns, 'pageSetup').text)
    if pageSetup is not None:
        if pageSetup.get('orientation') is not None:
            ws.page_setup.orientation = pageSetup.get('orientation')
        if pageSetup.get('paperSize') is not None:
            ws.page_setup.paperSize = pageSetup.get('paperSize')
        if pageSetup.get('scale') is not None:
            ws.page_setup.top = pageSetup.get('scale')
        if pageSetup.get('fitToPage') is not None:
            ws.page_setup.fitToPage = pageSetup.get('fitToPage')
        if pageSetup.get('fitToHeight') is not None:
            ws.page_setup.fitToHeight = pageSetup.get('fitToHeight')
        if pageSetup.get('fitToWidth') is not None:
            ws.page_setup.fitToWidth = pageSetup.get('fitToWidth')
        if pageSetup.get('firstPageNumber') is not None:
            ws.page_setup.firstPageNumber = pageSetup.get('firstPageNumber')
        if pageSetup.get('useFirstPageNumber') is not None:
            ws.page_setup.useFirstPageNumber = pageSetup.get('useFirstPageNumber')

    headerFooter = root.find(QName(xmlns, 'headerFooter').text)
    if headerFooter is not None:
        oddHeader = headerFooter.find(QName(xmlns, 'oddHeader').text)
        if oddHeader is not None:
            ws.header_footer.setHeader(oddHeader.text)
        oddFooter = headerFooter.find(QName(xmlns, 'oddFooter').text)
        if oddFooter is not None:
            ws.header_footer.setFooter(oddFooter.text)
Exemplo n.º 15
0
def fast_parse(ws, xml_source, string_table, style_table):

    root = fromstring(xml_source)
    guess_types = ws.parent._guess_types

    mergeCells = root.find('{%s}mergeCells' % SHEET_MAIN_NS)
    if mergeCells is not None:
        for mergeCell in mergeCells.findall('{%s}mergeCell' % SHEET_MAIN_NS):
            ws.merge_cells(mergeCell.get('ref'))

    source = _get_xml_iter(xml_source)

    it = iterparse(source)

    for event, element in filter(filter_cells, it):

        value = element.findtext('{%s}v' % SHEET_MAIN_NS)
        formula = element.findtext('{%s}f' % SHEET_MAIN_NS)

        coordinate = element.get('r')
        style_id = element.get('s')
        if style_id is not None:
            ws._styles[coordinate] = style_table.get(int(style_id))

        if value is not None:
            data_type = element.get('t', 'n')
            if data_type == Cell.TYPE_STRING:
                value = string_table.get(int(value))
            if formula is not None:
                value = "=" + str(formula)
            if not guess_types and not formula:
                ws.cell(coordinate).set_explicit_value(value=value,
                                                       data_type=data_type)
            else:
                ws.cell(coordinate).value = value


        # to avoid memory exhaustion, clear the item after use
        element.clear()

    cols = root.find('{%s}cols' % SHEET_MAIN_NS)
    if cols is not None:
        colNodes = cols.findall('{%s}col' % SHEET_MAIN_NS)
        for col in colNodes:
            min = int(col.get('min')) if col.get('min') else 1
            max = int(col.get('max')) if col.get('max') else 1
            for colId in range(min, max + 1):
                column = get_column_letter(colId)
                if column not in ws.column_dimensions:
                    ws.column_dimensions[column] = ColumnDimension(column)
                if col.get('width') is not None:
                    ws.column_dimensions[column].width = float(col.get('width'))
                if col.get('bestFit') == '1':
                    ws.column_dimensions[column].auto_size = True
                if col.get('hidden') == '1':
                    ws.column_dimensions[column].visible = False
                if col.get('outlineLevel') is not None:
                    ws.column_dimensions[column].outline_level = int(col.get('outlineLevel'))
                if col.get('collapsed') == '1':
                    ws.column_dimensions[column].collapsed = True
                if col.get('style') is not None:
                    ws.column_dimensions[column].style_index = col.get('style')

    sheetData = root.find('{%s}sheetData' % SHEET_MAIN_NS)
    if sheetData is not None:
        rowNodes = sheetData.findall('{%s}row' % SHEET_MAIN_NS)
        for row in rowNodes:
            rowId = int(row.get('r'))
            if rowId not in ws.row_dimensions:
                ws.row_dimensions[rowId] = RowDimension(rowId)
            if row.get('ht') is not None:
                ws.row_dimensions[rowId].height = float(row.get('ht'))

    printOptions = root.find('{%s}printOptions' % SHEET_MAIN_NS)
    if printOptions is not None:
        if printOptions.get('horizontalCentered') is not None:
            ws.page_setup.horizontalCentered = printOptions.get('horizontalCentered')
        if printOptions.get('verticalCentered') is not None:
            ws.page_setup.verticalCentered = printOptions.get('verticalCentered')

    pageMargins = root.find('{%s}pageMargins' % SHEET_MAIN_NS)
    if pageMargins is not None:
        if pageMargins.get('left') is not None:
            ws.page_margins.left = float(pageMargins.get('left'))
        if pageMargins.get('right') is not None:
            ws.page_margins.right = float(pageMargins.get('right'))
        if pageMargins.get('top') is not None:
            ws.page_margins.top = float(pageMargins.get('top'))
        if pageMargins.get('bottom') is not None:
            ws.page_margins.bottom = float(pageMargins.get('bottom'))
        if pageMargins.get('header') is not None:
            ws.page_margins.header = float(pageMargins.get('header'))
        if pageMargins.get('footer') is not None:
            ws.page_margins.footer = float(pageMargins.get('footer'))

    pageSetup = root.find('{%s}pageSetup' % SHEET_MAIN_NS)
    if pageSetup is not None:
        if pageSetup.get('orientation') is not None:
            ws.page_setup.orientation = pageSetup.get('orientation')
        if pageSetup.get('paperSize') is not None:
            ws.page_setup.paperSize = pageSetup.get('paperSize')
        if pageSetup.get('scale') is not None:
            ws.page_setup.top = pageSetup.get('scale')
        if pageSetup.get('fitToPage') is not None:
            ws.page_setup.fitToPage = pageSetup.get('fitToPage')
        if pageSetup.get('fitToHeight') is not None:
            ws.page_setup.fitToHeight = pageSetup.get('fitToHeight')
        if pageSetup.get('fitToWidth') is not None:
            ws.page_setup.fitToWidth = pageSetup.get('fitToWidth')
        if pageSetup.get('firstPageNumber') is not None:
            ws.page_setup.firstPageNumber = pageSetup.get('firstPageNumber')
        if pageSetup.get('useFirstPageNumber') is not None:
            ws.page_setup.useFirstPageNumber = pageSetup.get('useFirstPageNumber')

    headerFooter = root.find('{%s}headerFooter' % SHEET_MAIN_NS)
    if headerFooter is not None:
        oddHeader = headerFooter.find('{%s}oddHeader' % SHEET_MAIN_NS)
        if oddHeader is not None and oddHeader.text is not None:
            ws.header_footer.setHeader(oddHeader.text)
        oddFooter = headerFooter.find('{%s}oddFooter' % SHEET_MAIN_NS)
        if oddFooter is not None and oddFooter.text is not None:
            ws.header_footer.setFooter(oddFooter.text)
Exemplo n.º 16
0
def fast_parse(ws, xml_source, string_table, style_table, color_index=None):

    root = fromstring(xml_source)
    guess_types = ws.parent._guess_types

    mergeCells = root.find('{%s}mergeCells' % SHEET_MAIN_NS)
    if mergeCells is not None:
        for mergeCell in mergeCells.findall('{%s}mergeCell' % SHEET_MAIN_NS):
            ws.merge_cells(mergeCell.get('ref'))

    source = _get_xml_iter(xml_source)

    it = iterparse(source)

    for event, element in filter(filter_cells, it):

        value = element.findtext('{%s}v' % SHEET_MAIN_NS)
        formula = element.find('{%s}f' % SHEET_MAIN_NS)

        coordinate = element.get('r')
        style_id = element.get('s')
        if style_id is not None:
            ws._styles[coordinate] = style_table.get(int(style_id))

        if value is not None:
            data_type = element.get('t', 'n')
            if data_type == Cell.TYPE_STRING:
                value = string_table.get(int(value))
            if formula is not None:
                if formula.text:
                    value = "=" + str(formula.text)
                else:
                    value = "="
                formula_type = formula.get('t')
                if formula_type:
                    ws.formula_attributes[coordinate] = {'t': formula_type}
                    if formula.get('si'):  # Shared group index for shared formulas
                        ws.formula_attributes[coordinate]['si'] = formula.get('si')
                    if formula.get('ref'):  # Range for shared formulas
                        ws.formula_attributes[coordinate]['ref'] = formula.get('ref')
            if not guess_types and formula is None:
                ws.cell(coordinate).set_explicit_value(value=value, data_type=data_type)
            else:
                ws.cell(coordinate).value = value

        # to avoid memory exhaustion, clear the item after use
        element.clear()

    cols = root.find('{%s}cols' % SHEET_MAIN_NS)
    if cols is not None:
        colNodes = cols.findall('{%s}col' % SHEET_MAIN_NS)
        for col in colNodes:
            min = int(col.get('min')) if col.get('min') else 1
            max = int(col.get('max')) if col.get('max') else 1
            # Ignore ranges that go up to the max column 16384.  Columns need to be extended to handle
            # ranges without creating an entry for every single one.
            if max != 16384:
                for colId in range(min, max + 1):
                    column = get_column_letter(colId)
                    if column not in ws.column_dimensions:
                        ws.column_dimensions[column] = ColumnDimension(column)
                    if col.get('width') is not None:
                        ws.column_dimensions[column].width = float(col.get('width'))
                    if col.get('bestFit') == '1':
                        ws.column_dimensions[column].auto_size = True
                    if col.get('hidden') == '1':
                        ws.column_dimensions[column].visible = False
                    if col.get('outlineLevel') is not None:
                        ws.column_dimensions[column].outline_level = int(col.get('outlineLevel'))
                    if col.get('collapsed') == '1':
                        ws.column_dimensions[column].collapsed = True
                    if col.get('style') is not None:
                        ws.column_dimensions[column].style_index = style_table.get(int(col.get('style')))

    sheetData = root.find('{%s}sheetData' % SHEET_MAIN_NS)
    if sheetData is not None:
        rowNodes = sheetData.findall('{%s}row' % SHEET_MAIN_NS)
        for row in rowNodes:
            rowId = int(row.get('r'))
            if rowId not in ws.row_dimensions:
                ws.row_dimensions[rowId] = RowDimension(rowId)
            if row.get('ht') is not None:
                ws.row_dimensions[rowId].height = float(row.get('ht'))

    printOptions = root.find('{%s}printOptions' % SHEET_MAIN_NS)
    if printOptions is not None:
        if printOptions.get('horizontalCentered') is not None:
            ws.page_setup.horizontalCentered = printOptions.get('horizontalCentered')
        if printOptions.get('verticalCentered') is not None:
            ws.page_setup.verticalCentered = printOptions.get('verticalCentered')

    pageMargins = root.find('{%s}pageMargins' % SHEET_MAIN_NS)
    if pageMargins is not None:
        if pageMargins.get('left') is not None:
            ws.page_margins.left = float(pageMargins.get('left'))
        if pageMargins.get('right') is not None:
            ws.page_margins.right = float(pageMargins.get('right'))
        if pageMargins.get('top') is not None:
            ws.page_margins.top = float(pageMargins.get('top'))
        if pageMargins.get('bottom') is not None:
            ws.page_margins.bottom = float(pageMargins.get('bottom'))
        if pageMargins.get('header') is not None:
            ws.page_margins.header = float(pageMargins.get('header'))
        if pageMargins.get('footer') is not None:
            ws.page_margins.footer = float(pageMargins.get('footer'))

    pageSetup = root.find('{%s}pageSetup' % SHEET_MAIN_NS)
    if pageSetup is not None:
        if pageSetup.get('orientation') is not None:
            ws.page_setup.orientation = pageSetup.get('orientation')
        if pageSetup.get('paperSize') is not None:
            ws.page_setup.paperSize = pageSetup.get('paperSize')
        if pageSetup.get('scale') is not None:
            ws.page_setup.top = pageSetup.get('scale')
        if pageSetup.get('fitToPage') is not None:
            ws.page_setup.fitToPage = pageSetup.get('fitToPage')
        if pageSetup.get('fitToHeight') is not None:
            ws.page_setup.fitToHeight = pageSetup.get('fitToHeight')
        if pageSetup.get('fitToWidth') is not None:
            ws.page_setup.fitToWidth = pageSetup.get('fitToWidth')
        if pageSetup.get('firstPageNumber') is not None:
            ws.page_setup.firstPageNumber = pageSetup.get('firstPageNumber')
        if pageSetup.get('useFirstPageNumber') is not None:
            ws.page_setup.useFirstPageNumber = pageSetup.get('useFirstPageNumber')

    headerFooter = root.find('{%s}headerFooter' % SHEET_MAIN_NS)
    if headerFooter is not None:
        oddHeader = headerFooter.find('{%s}oddHeader' % SHEET_MAIN_NS)
        if oddHeader is not None and oddHeader.text is not None:
            ws.header_footer.setHeader(oddHeader.text)
        oddFooter = headerFooter.find('{%s}oddFooter' % SHEET_MAIN_NS)
        if oddFooter is not None and oddFooter.text is not None:
            ws.header_footer.setFooter(oddFooter.text)

    conditionalFormattingNodes = root.findall('{%s}conditionalFormatting' % SHEET_MAIN_NS)
    rules = {}
    for cf in conditionalFormattingNodes:
        if not cf.get('sqref'):
            # Potentially flag - this attribute should always be present.
            continue
        range_string = cf.get('sqref')
        cfRules = cf.findall('{%s}cfRule' % SHEET_MAIN_NS)
        rules[range_string] = []
        for cfRule in cfRules:
            if not cfRule.get('type') or cfRule.get('type') == 'dataBar':
                # dataBar conditional formatting isn't supported, as it relies on the complex <extLst> tag
                continue
            rule = {'type': cfRule.get('type')}
            for attr in ConditionalFormatting.rule_attributes:
                if cfRule.get(attr) is not None:
                    rule[attr] = cfRule.get(attr)

            formula = cfRule.findall('{%s}formula' % SHEET_MAIN_NS)
            for f in formula:
                if 'formula' not in rule:
                    rule['formula'] = []
                rule['formula'].append(f.text)

            colorScale = cfRule.find('{%s}colorScale' % SHEET_MAIN_NS)
            if colorScale is not None:
                rule['colorScale'] = {'cfvo': [], 'color': []}
                cfvoNodes = colorScale.findall('{%s}cfvo' % SHEET_MAIN_NS)
                for node in cfvoNodes:
                    cfvo = {}
                    if node.get('type') is not None:
                        cfvo['type'] = node.get('type')
                    if node.get('val') is not None:
                        cfvo['val'] = node.get('val')
                    rule['colorScale']['cfvo'].append(cfvo)
                colorNodes = colorScale.findall('{%s}color' % SHEET_MAIN_NS)
                for color in colorNodes:
                    c = Color(Color.BLACK)
                    if color_index and color.get('indexed') is not None and 0 <= int(color.get('indexed')) < len(color_index):
                        c.index = color_index[int(color.get('indexed'))]
                    if color.get('theme') is not None:
                        if color.get('tint') is not None:
                            c.index = 'theme:%s:%s' % (color.get('theme'), color.get('tint'))
                        else:
                            c.index = 'theme:%s:' % color.get('theme')  # prefix color with theme
                    elif color.get('rgb'):
                        c.index = color.get('rgb')
                    rule['colorScale']['color'].append(c)

            iconSet = cfRule.find('{%s}iconSet' % SHEET_MAIN_NS)
            if iconSet is not None:
                rule['iconSet'] = {'cfvo': []}
                for iconAttr in ConditionalFormatting.icon_attributes:
                    if iconSet.get(iconAttr) is not None:
                        rule['iconSet'][iconAttr] = iconSet.get(iconAttr)
                cfvoNodes = iconSet.findall('{%s}cfvo' % SHEET_MAIN_NS)
                for node in cfvoNodes:
                    cfvo = {}
                    if node.get('type') is not None:
                        cfvo['type'] = node.get('type')
                    if node.get('val') is not None:
                        cfvo['val'] = node.get('val')
                    rule['iconSet']['cfvo'].append(cfvo)

            rules[range_string].append(rule)
    if len(rules):
        ws.conditional_formatting.setRules(rules)