示例#1
0
    def parse(self):
        stream = _get_xml_iter(self.source)
        it = iterparse(stream)

        dispatcher = {
            '{%s}mergeCells' % SHEET_MAIN_NS: self.parse_merge,
            '{%s}col' % SHEET_MAIN_NS: self.parse_column_dimensions,
            '{%s}row' % SHEET_MAIN_NS: self.parse_row_dimensions,
            '{%s}printOptions' % SHEET_MAIN_NS: self.parse_print_options,
            '{%s}pageMargins' % SHEET_MAIN_NS: self.parse_margins,
            '{%s}pageSetup' % SHEET_MAIN_NS: self.parse_page_setup,
            '{%s}headerFooter' % SHEET_MAIN_NS: self.parse_header_footer,
            '{%s}conditionalFormatting' % SHEET_MAIN_NS: self.parser_conditional_formatting,
            '{%s}autoFilter' % SHEET_MAIN_NS: self.parse_auto_filter
                      }
        tags = dispatcher.keys()
        stream = _get_xml_iter(self.source)
        it = iterparse(stream, tag=tags)

        for event, element in it:
            tag_name = element.tag
            if tag_name in dispatcher:
                dispatcher[tag_name](element)
                element.clear()

        # Handle parsed conditional formatting rules together.
        if len(self.ws.conditional_formatting.parse_rules):
            self.ws.conditional_formatting.update(self.ws.conditional_formatting.parse_rules)
示例#2
0
    def parse(self):
        dispatcher = {
            "{%s}mergeCells" % SHEET_MAIN_NS: self.parse_merge,
            "{%s}col" % SHEET_MAIN_NS: self.parse_column_dimensions,
            "{%s}row" % SHEET_MAIN_NS: self.parse_row_dimensions,
            "{%s}printOptions" % SHEET_MAIN_NS: self.parse_print_options,
            "{%s}pageMargins" % SHEET_MAIN_NS: self.parse_margins,
            "{%s}pageSetup" % SHEET_MAIN_NS: self.parse_page_setup,
            "{%s}headerFooter" % SHEET_MAIN_NS: self.parse_header_footer,
            "{%s}conditionalFormatting" % SHEET_MAIN_NS: self.parser_conditional_formatting,
            "{%s}autoFilter" % SHEET_MAIN_NS: self.parse_auto_filter,
            "{%s}sheetProtection" % SHEET_MAIN_NS: self.parse_sheet_protection,
            "{%s}dataValidations" % SHEET_MAIN_NS: self.parse_data_validation,
            "{%s}sheetPr" % SHEET_MAIN_NS: self.parse_properties,
            "{%s}legacyDrawing" % SHEET_MAIN_NS: self.parse_legacy_drawing,
        }
        tags = dispatcher.keys()
        stream = _get_xml_iter(self.source)
        it = iterparse(stream, tag=tags)

        for _, element in it:
            tag_name = element.tag
            if tag_name in dispatcher:
                dispatcher[tag_name](element)
                element.clear()

        # Handle parsed conditional formatting rules together.
        if len(self.ws.conditional_formatting.parse_rules):
            self.ws.conditional_formatting.update(self.ws.conditional_formatting.parse_rules)
    def get_squared_range(self, min_col, min_row, max_col, max_row):
        """
        The source worksheet file may have columns or rows missing.
        Missing cells will be created.
        """
        if max_col is not None:
            empty_row = tuple(EMPTY_CELL for column in range(min_col, max_col + 1))
        else:
            empty_row = []
        row_counter = min_row

        p = iterparse(self.xml_source, tag=[ROW_TAG], remove_blank_text=True)
        for _event, element in p:
            if element.tag == ROW_TAG:
                row_id = int(element.get("r", row_counter))

                # got all the rows we need
                if max_row is not None and row_id > max_row:
                    break

                # some rows are missing
                for row_counter in range(row_counter, row_id):
                    row_counter += 1
                    yield empty_row

                # return cells from a row
                if min_row <= row_id:
                    yield tuple(self._get_row(element, min_col, max_col, row_counter=row_counter))
                    row_counter += 1

                element.clear()
示例#4
0
    def _cells_by_row(self, min_col, min_row, max_col, max_row):
        """
        The source worksheet file may have columns or rows missing.
        Missing cells will be created.
        """
        if max_col is not None:
            empty_row = tuple(EMPTY_CELL
                              for column in range(min_col, max_col + 1))
        else:
            empty_row = []
        row_counter = min_row

        if self._iter_parse is None:
            p = iterparse(self.xml_source,
                          tag=[ROW_TAG],
                          remove_blank_text=True)
            self._iter_parse = {
                int(element.get("r", -1)): (_event, element)
                for _event, element in p if element.tag == ROW_TAG
            }

        for row_id in range(min_row, max_row + 1):
            result_obj = self._iter_parse.get(row_id)

            # some rows are missing
            if result_obj is None:
                yield empty_row

            _event, element = result_obj
            yield tuple(
                self._get_row(element, min_col, max_col, row_counter=row_id))
示例#5
0
    def _cells_by_row(self, min_col, min_row, max_col, max_row):
        """
        The source worksheet file may have columns or rows missing.
        Missing cells will be created.
        """
        if max_col is not None:
            empty_row = tuple(EMPTY_CELL for column in range(min_col, max_col + 1))
        else:
            empty_row = []
        row_counter = min_row

        p = iterparse(self.xml_source, tag=[ROW_TAG], remove_blank_text=True)
        for _event, element in p:
            if element.tag == ROW_TAG:
                row_id = int(element.get("r", row_counter))

                # got all the rows we need
                if max_row is not None and row_id > max_row:
                    break

                # some rows are missing
                for row_counter in range(row_counter, row_id):
                    row_counter += 1
                    yield empty_row

                # return cells from a row
                if min_row <= row_id:
                    yield tuple(self._get_row(element, min_col, max_col, row_counter=row_counter))
                    row_counter += 1

                element.clear()
示例#6
0
    def get_squared_range(self, min_col, min_row, max_col, max_row):
        """
        The source worksheet file may have columns or rows missing.
        Missing cells will be created.
        """
        if max_col is not None:
            empty_row = tuple(EMPTY_CELL
                              for column in range(min_col, max_col + 1))
        else:
            empty_row = []
        row_counter = min_row

        p = iterparse(self.xml_source, tag=[ROW_TAG], remove_blank_text=True)
        for _event, element in p:
            if element.tag == ROW_TAG:
                row_id = int(element.get("r"))

                # got all the rows we need
                if max_row is not None and row_id > max_row:
                    break

                # some rows are missing
                for row_counter in range(row_counter, row_id):
                    row_counter += 1
                    yield empty_row

                # return cells from a row
                if min_row <= row_id:
                    yield tuple(self._get_row(element, min_col, max_col))
                    row_counter += 1

            if element.tag in (CELL_TAG, VALUE_TAG, FORMULA_TAG):
                # sub-elements of rows should be skipped as handled within a cell
                continue
            element.clear()
示例#7
0
    def parse(self):
        dispatcher = {
            '{%s}mergeCells' % SHEET_MAIN_NS: self.parse_merge,
            '{%s}col' % SHEET_MAIN_NS: self.parse_column_dimensions,
            '{%s}row' % SHEET_MAIN_NS: self.parse_row_dimensions,
            '{%s}printOptions' % SHEET_MAIN_NS: self.parse_print_options,
            '{%s}pageMargins' % SHEET_MAIN_NS: self.parse_margins,
            '{%s}pageSetup' % SHEET_MAIN_NS: self.parse_page_setup,
            '{%s}headerFooter' % SHEET_MAIN_NS: self.parse_header_footer,
            '{%s}conditionalFormatting' % SHEET_MAIN_NS: self.parser_conditional_formatting,
            '{%s}autoFilter' % SHEET_MAIN_NS: self.parse_auto_filter,
            '{%s}sheetProtection' % SHEET_MAIN_NS: self.parse_sheet_protection,
            '{%s}dataValidations' % SHEET_MAIN_NS: self.parse_data_validation,
            '{%s}sheetPr' % SHEET_MAIN_NS: self.parse_properties,
            '{%s}legacyDrawing' % SHEET_MAIN_NS: self.parse_legacy_drawing,
            '{%s}sheetViews' % SHEET_MAIN_NS: self.parse_sheet_views,
                      }
        tags = dispatcher.keys()
        stream = _get_xml_iter(self.source)
        it = iterparse(stream, tag=tags)

        for _, element in it:
            tag_name = element.tag
            if tag_name in dispatcher:
                dispatcher[tag_name](element)
                element.clear()

        self.ws._current_row = self.ws.max_row
示例#8
0
 def get_cells(self, min_row, min_col, max_row, max_col):
     p = iterparse(self.xml_source, tag=[ROW_TAG], remove_blank_text=True)
     for _event, element in p:
         if element.tag == ROW_TAG:
             row = int(element.get("r"))
             if max_row is not None and row > max_row:
                 break
             if min_row <= row:
                 for cell in safe_iterator(element, CELL_TAG):
                     coord = cell.get('r')
                     column_str, row = coordinate_from_string(coord)
                     column = column_index_from_string(column_str)
                     if max_col is not None and column > max_col:
                         break
                     if min_col <= column:
                         data_type = cell.get('t', 'n')
                         style_id = cell.get('s')
                         formula = cell.findtext(FORMULA_TAG)
                         value = cell.findtext(VALUE_TAG)
                         if formula is not None and not self.parent.data_only:
                             data_type = Cell.TYPE_FORMULA
                             value = "=%s" % formula
                         yield ReadOnlyCell(row, column_str, value, data_type,
                                       style_id)
         if element.tag in (CELL_TAG, VALUE_TAG, FORMULA_TAG):
             # sub-elements of rows should be skipped
             continue
         element.clear()
示例#9
0
def read_dimension(source):
    if hasattr(source, "encode"):
        return
    min_row = min_col = max_row = max_col = None
    DIMENSION_TAG = '{%s}dimension' % SHEET_MAIN_NS
    DATA_TAG = '{%s}sheetData' % SHEET_MAIN_NS
    it = iterparse(source, tag=[DIMENSION_TAG, DATA_TAG])
    for _event, element in it:
        if element.tag == DIMENSION_TAG:
            dim = element.get("ref")
            m = ABSOLUTE_RE.match(dim.upper())
            if m is None:
                return
            min_col, min_row, sep, max_col, max_row = m.groups()
            min_row = int(min_row)
            if max_col is None or max_row is None:
                max_col = min_col
                max_row = min_row
            else:
                max_row = int(max_row)
            return min_col, min_row, max_col, max_row

        elif element.tag == DATA_TAG:
            # Dimensions missing
            break
        element.clear()
示例#10
0
    def parse(self):
        dispatcher = {
            '{%s}mergeCells' % SHEET_MAIN_NS: self.parse_merge,
            '{%s}col' % SHEET_MAIN_NS: self.parse_column_dimensions,
            '{%s}row' % SHEET_MAIN_NS: self.parse_row_dimensions,
            '{%s}printOptions' % SHEET_MAIN_NS: self.parse_print_options,
            '{%s}pageMargins' % SHEET_MAIN_NS: self.parse_margins,
            '{%s}pageSetup' % SHEET_MAIN_NS: self.parse_page_setup,
            '{%s}headerFooter' % SHEET_MAIN_NS: self.parse_header_footer,
            '{%s}conditionalFormatting' % SHEET_MAIN_NS:
            self.parser_conditional_formatting,
            '{%s}autoFilter' % SHEET_MAIN_NS: self.parse_auto_filter,
            '{%s}sheetProtection' % SHEET_MAIN_NS: self.parse_sheet_protection,
            '{%s}dataValidations' % SHEET_MAIN_NS: self.parse_data_validation,
            '{%s}sheetPr' % SHEET_MAIN_NS: self.parse_properties,
            '{%s}legacyDrawing' % SHEET_MAIN_NS: self.parse_legacy_drawing,
            '{%s}sheetViews' % SHEET_MAIN_NS: self.parse_sheet_views,
            '{%s}extLst' % SHEET_MAIN_NS: self.parse_extensions,
        }
        tags = dispatcher.keys()
        stream = _get_xml_iter(self.source)
        it = iterparse(stream, tag=tags)

        for _, element in it:
            tag_name = element.tag
            if tag_name in dispatcher:
                dispatcher[tag_name](element)
                element.clear()

        self.ws._current_row = self.ws.max_row
    def get_squared_range(self, min_col, min_row, max_col, max_row):
        """
        The source worksheet file may have columns or rows missing.
        Missing cells will be created.
        """
        if max_col is not None:
            empty_row = tuple(EMPTY_CELL for column in range(min_col, max_col + 1))
        else:
            expected_columns = []
        row_counter = min_row

        p = iterparse(self.xml_source, tag=[ROW_TAG], remove_blank_text=True)
        for _event, element in p:
            if element.tag == ROW_TAG:
                row_id = int(element.get("r"))

                # got all the rows we need
                if max_row is not None and row_id > max_row:
                    break

                # some rows are missing
                for row_counter in range(row_counter, row_id):
                    yield empty_row

                # return cells from a row
                if min_row <= row_id:
                    yield tuple(self._get_row(element, min_col, max_col))
                    row_counter += 1

            if element.tag in (CELL_TAG, VALUE_TAG, FORMULA_TAG):
                # sub-elements of rows should be skipped as handled within a cell
                continue
            element.clear()
示例#12
0
 def get_cells(self, min_row, min_col, max_row, max_col):
     p = iterparse(self.xml_source, tag=[ROW_TAG], remove_blank_text=True)
     for _event, element in p:
         if element.tag == ROW_TAG:
             row = int(element.get("r"))
             if max_row is not None and row > max_row:
                 break
             if min_row <= row:
                 for cell in safe_iterator(element, CELL_TAG):
                     coord = cell.get('r')
                     column_str, row = coordinate_from_string(coord)
                     column = column_index_from_string(column_str)
                     if max_col is not None and column > max_col:
                         break
                     if min_col <= column:
                         data_type = cell.get('t', 'n')
                         style_id = cell.get('s')
                         formula = cell.findtext(FORMULA_TAG)
                         value = cell.findtext(VALUE_TAG)
                         if formula is not None and not self.parent.data_only:
                             data_type = Cell.TYPE_FORMULA
                             value = "=%s" % formula
                         yield ReadOnlyCell(self, row, column_str,
                                            value, data_type, style_id)
         if element.tag in (CELL_TAG, VALUE_TAG, FORMULA_TAG):
             # sub-elements of rows should be skipped
             continue
         element.clear()
示例#13
0
def read_dimension(source):
    if hasattr(source, "encode"):
        return
    min_row = min_col =  max_row = max_col = None
    DIMENSION_TAG = '{%s}dimension' % SHEET_MAIN_NS
    DATA_TAG = '{%s}sheetData' % SHEET_MAIN_NS
    it = iterparse(source, tag=[DIMENSION_TAG, DATA_TAG])
    for _event, element in it:
        if element.tag == DIMENSION_TAG:
            dim = element.get("ref")
            m = ABSOLUTE_RE.match(dim.upper())
            if m is None:
                return
            min_col, min_row, sep, max_col, max_row = m.groups()
            min_row = int(min_row)
            if max_col is None or max_row is None:
                max_col = min_col
                max_row = min_row
            else:
                max_row = int(max_row)
            return (
                column_index_from_string(min_col),
                min_row,
                column_index_from_string(max_col),
                max_row
                )

        elif element.tag == DATA_TAG:
            # Dimensions missing
            break
        element.clear()
示例#14
0
def read_string_table(xml_source):
    """Read in all shared strings in the table"""
    strings = []
    src = _get_xml_iter(xml_source)

    for _, node in iterparse(src):
        if node.tag == '{%s}si' % SHEET_MAIN_NS:

            text = Text.from_tree(node).content
            text = text.replace('x005F_', '')
            strings.append(text)

            node.clear()

    return IndexedList(strings)
示例#15
0
    def parse_dimensions(self):
        """
        Get worksheet dimensions if they are provided.
        """
        it = iterparse(self.source)

        for _event, element in it:
            if element.tag == DIMENSION_TAG:
                dim = SheetDimension.from_tree(element)
                return dim.boundaries

            elif element.tag == DATA_TAG:
                # Dimensions missing
                break
            element.clear()
示例#16
0
def read_string_table(xml_source):
    """Read in all shared strings in the table"""

    strings = []
    STRING_TAG = '{%s}si' % SHEET_MAIN_NS

    for _, node in iterparse(xml_source):
        if node.tag == STRING_TAG:
            text = Text.from_tree(node).content
            text = text.replace('x005F_', '')
            node.clear()

            strings.append(text)

    return strings
示例#17
0
    def _get_cells(self, min_row, min_col, max_row, max_col):
        p = iterparse(self.xml_source, tag=[ROW_TAG], remove_blank_text=True)
        col_counter = min_col
        for _event, element in p:
            if element.tag == ROW_TAG:
                row = int(element.get("r"))
                if max_row is not None and row > max_row:
                    break
                if min_row <= row:
                    yield row, tuple(self._get_row(element, min_col, max_col))

            if element.tag in (CELL_TAG, VALUE_TAG, FORMULA_TAG):
                # sub-elements of rows should be skipped
                continue
            element.clear()
示例#18
0
    def _get_cells(self, min_row, min_col, max_row, max_col):
        p = iterparse(self.xml_source, tag=[ROW_TAG], remove_blank_text=True)
        col_counter = min_col
        for _event, element in p:
            if element.tag == ROW_TAG:
                row = int(element.get("r"))
                if max_row is not None and row > max_row:
                    break
                if min_row <= row:
                    yield row, tuple(self._get_row(element, min_col, max_col))

            if element.tag in (CELL_TAG, VALUE_TAG, FORMULA_TAG):
                # sub-elements of rows should be skipped
                continue
            element.clear()
示例#19
0
文件: strings.py 项目: ACMH1/ExcelDB
def read_string_table(xml_source):
    """Read in all shared strings in the table"""
    strings = []
    src = _get_xml_iter(xml_source)

    for _, node in iterparse(src):
        if node.tag == '{%s}si' % SHEET_MAIN_NS:

            text = Text.from_tree(node).content
            text = text.replace('x005F_', '')
            strings.append(text)

            node.clear()

    return IndexedList(strings)
示例#20
0
    def parse(self):
        dispatcher = {
            '{%s}mergeCells' % SHEET_MAIN_NS: self.parse_merge,
            '{%s}col' % SHEET_MAIN_NS: self.parse_column_dimensions,
            '{%s}row' % SHEET_MAIN_NS: self.parse_row,
            '{%s}conditionalFormatting' % SHEET_MAIN_NS:
            self.parser_conditional_formatting,
            '{%s}legacyDrawing' % SHEET_MAIN_NS: self.parse_legacy_drawing,
            '{%s}sheetProtection' % SHEET_MAIN_NS: self.parse_sheet_protection,
            '{%s}extLst' % SHEET_MAIN_NS: self.parse_extensions,
            '{%s}hyperlink' % SHEET_MAIN_NS: self.parse_hyperlinks,
            '{%s}tableParts' % SHEET_MAIN_NS: self.parse_tables,
        }

        properties = {
            '{%s}printOptions' % SHEET_MAIN_NS:
            ('print_options', PrintOptions),
            '{%s}pageMargins' % SHEET_MAIN_NS: ('page_margins', PageMargins),
            '{%s}pageSetup' % SHEET_MAIN_NS: ('page_setup', PrintPageSetup),
            '{%s}headerFooter' % SHEET_MAIN_NS: ('HeaderFooter', HeaderFooter),
            '{%s}autoFilter' % SHEET_MAIN_NS: ('auto_filter', AutoFilter),
            '{%s}dataValidations' % SHEET_MAIN_NS:
            ('data_validations', DataValidationList),
            #'{%s}sheet/{%s}sortState' % (SHEET_MAIN_NS, SHEET_MAIN_NS): ('sort_state', SortState),
            '{%s}sheetPr' % SHEET_MAIN_NS:
            ('sheet_properties', WorksheetProperties),
            '{%s}sheetViews' % SHEET_MAIN_NS: ('views', SheetViewList),
            '{%s}sheetFormatPr' % SHEET_MAIN_NS:
            ('sheet_format', SheetFormatProperties),
            '{%s}rowBreaks' % SHEET_MAIN_NS: ('page_breaks', PageBreak),
        }

        tags = dispatcher.keys()
        stream = _get_xml_iter(self.source)
        it = iterparse(stream, tag=tags)

        for _, element in it:
            tag_name = element.tag
            if tag_name in dispatcher:
                dispatcher[tag_name](element)
                element.clear()
            elif tag_name in properties:
                prop = properties[tag_name]
                obj = prop[1].from_tree(element)
                setattr(self.ws, prop[0], obj)
                element.clear()

        self.ws._current_row = self.ws.max_row
示例#21
0
文件: _reader.py 项目: chenlei0x/yc
    def parse(self):
        dispatcher = {
            COL_TAG: self.parse_column_dimensions,
            PROT_TAG: self.parse_sheet_protection,
            EXT_TAG: self.parse_extensions,
            CF_TAG: self.parse_formatting,
            LEGACY_TAG: self.parse_legacy,
            ROW_BREAK_TAG: self.parse_row_breaks,
            COL_BREAK_TAG: self.parse_col_breaks,
            CUSTOM_VIEWS_TAG: self.parse_custom_views,
        }

        properties = {
            PRINT_TAG: ('print_options', PrintOptions),
            MARGINS_TAG: ('page_margins', PageMargins),
            PAGE_TAG: ('page_setup', PrintPageSetup),
            HEADER_TAG: ('HeaderFooter', HeaderFooter),
            FILTER_TAG: ('auto_filter', AutoFilter),
            VALIDATION_TAG: ('data_validations', DataValidationList),
            PROPERTIES_TAG: ('sheet_properties', WorksheetProperties),
            VIEWS_TAG: ('views', SheetViewList),
            FORMAT_TAG: ('sheet_format', SheetFormatProperties),
            SCENARIOS_TAG: ('scenarios', ScenarioList),
            TABLE_TAG: ('tables', TablePartList),
            HYPERLINK_TAG: ('hyperlinks', HyperlinkList),
            MERGE_TAG: ('merged_cells', MergeCells),
        }

        it = iterparse(
            self.source
        )  # add a finaliser to close the source when this becomes possible

        for _, element in it:
            tag_name = element.tag
            if tag_name in dispatcher:
                dispatcher[tag_name](element)
                element.clear()
            elif tag_name in properties:
                prop = properties[tag_name]
                obj = prop[1].from_tree(element)
                setattr(self, prop[0], obj)
                element.clear()
            elif tag_name == ROW_TAG:
                row = self.parse_row(element)
                element.clear()
                yield row
def read_dimension(source):
    if hasattr(source, "encode"):
        return

    min_row = min_col =  max_row = max_col = None
    DIMENSION_TAG = '{%s}dimension' % SHEET_MAIN_NS
    DATA_TAG = '{%s}sheetData' % SHEET_MAIN_NS
    it = iterparse(source, tag=[DIMENSION_TAG, DATA_TAG])

    for _event, element in it:
        if element.tag == DIMENSION_TAG:
            dim = SheetDimension.from_tree(element)
            return dim.boundaries

        elif element.tag == DATA_TAG:
            # Dimensions missing
            break
        element.clear()
示例#23
0
def read_dimension(source):
    if hasattr(source, "encode"):
        return

    min_row = min_col =  max_row = max_col = None
    DIMENSION_TAG = '{%s}dimension' % SHEET_MAIN_NS
    DATA_TAG = '{%s}sheetData' % SHEET_MAIN_NS
    it = iterparse(source, tag=[DIMENSION_TAG, DATA_TAG])

    for _event, element in it:
        if element.tag == DIMENSION_TAG:
            dim = SheetDimension.from_tree(element)
            return dim.boundaries

        elif element.tag == DATA_TAG:
            # Dimensions missing
            break
        element.clear()
示例#24
0
def read_dimension(source):
    min_row = min_col =  max_row = max_col = None
    DIMENSION_TAG = '{%s}dimension' % SHEET_MAIN_NS
    DATA_TAG = '{%s}sheetData' % SHEET_MAIN_NS
    it = iterparse(source, tag=[DIMENSION_TAG, DATA_TAG])
    for _event, element in it:
        if element.tag == DIMENSION_TAG:
            dim = element.get("ref")
            if ':' in dim:
                start, stop = dim.split(':')
            else:
                start = stop = dim
            min_col, min_row = coordinate_from_string(start)
            max_col, max_row = coordinate_from_string(stop)
            return min_col, min_row, max_col, max_row
        elif element.tag == DATA_TAG:
            # Dimensions missing
            break
        element.clear()
示例#25
0
def read_dimension(source):
    min_row = min_col =  max_row = max_col = None
    DIMENSION_TAG = '{%s}dimension' % SHEET_MAIN_NS
    DATA_TAG = '{%s}sheetData' % SHEET_MAIN_NS
    it = iterparse(source, tag=[DIMENSION_TAG, DATA_TAG])
    for _event, element in it:
        if element.tag == DIMENSION_TAG:
            dim = element.get("ref")
            if ':' in dim:
                start, stop = dim.split(':')
            else:
                start = stop = dim
            min_col, min_row = coordinate_from_string(start)
            max_col, max_row = coordinate_from_string(stop)
            return min_col, min_row, max_col, max_row
        elif element.tag == DATA_TAG:
            # Dimensions missing
            break
        element.clear()
示例#26
0
文件: worksheet.py 项目: cloudera/hue
    def parse(self):
        dispatcher = {
            '{%s}mergeCells' % SHEET_MAIN_NS: self.parse_merge,
            '{%s}col' % SHEET_MAIN_NS: self.parse_column_dimensions,
            '{%s}row' % SHEET_MAIN_NS: self.parse_row,
            '{%s}conditionalFormatting' % SHEET_MAIN_NS: self.parser_conditional_formatting,
            '{%s}legacyDrawing' % SHEET_MAIN_NS: self.parse_legacy_drawing,
            '{%s}sheetProtection' % SHEET_MAIN_NS: self.parse_sheet_protection,
            '{%s}extLst' % SHEET_MAIN_NS: self.parse_extensions,
            '{%s}hyperlink' % SHEET_MAIN_NS: self.parse_hyperlinks,
            '{%s}tableParts' % SHEET_MAIN_NS: self.parse_tables,
                      }

        properties = {
            '{%s}printOptions' % SHEET_MAIN_NS: ('print_options', PrintOptions),
            '{%s}pageMargins' % SHEET_MAIN_NS: ('page_margins', PageMargins),
            '{%s}pageSetup' % SHEET_MAIN_NS: ('page_setup', PrintPageSetup),
            '{%s}headerFooter' % SHEET_MAIN_NS: ('HeaderFooter', HeaderFooter),
            '{%s}autoFilter' % SHEET_MAIN_NS: ('auto_filter', AutoFilter),
            '{%s}dataValidations' % SHEET_MAIN_NS: ('data_validations', DataValidationList),
            #'{%s}sheet/{%s}sortState' % (SHEET_MAIN_NS, SHEET_MAIN_NS): ('sort_state', SortState),
            '{%s}sheetPr' % SHEET_MAIN_NS: ('sheet_properties', WorksheetProperties),
            '{%s}sheetViews' % SHEET_MAIN_NS: ('views', SheetViewList),
            '{%s}sheetFormatPr' % SHEET_MAIN_NS: ('sheet_format', SheetFormatProperties),
            '{%s}rowBreaks' % SHEET_MAIN_NS: ('page_breaks', PageBreak),
        }

        stream = _get_xml_iter(self.source)
        it = iterparse(stream, tag=dispatcher)

        for _, element in it:
            tag_name = element.tag
            if tag_name in dispatcher:
                dispatcher[tag_name](element)
                element.clear()
            elif tag_name in properties:
                prop = properties[tag_name]
                obj = prop[1].from_tree(element)
                setattr(self.ws, prop[0], obj)
                element.clear()

        self.ws._current_row = self.ws.max_row