Пример #1
0
def get_text(rich_node):
    """Read rich text, discarding formatting if not disallowed"""
    text_node = rich_node.find('{%s}t' % SHEET_MAIN_NS)
    partial_text = text_node.text or unicode('')

    if text_node.get('{%s}space' % XML_NS) != 'preserve':
        partial_text = partial_text.strip()
    return unicode(partial_text)
Пример #2
0
def get_text(xmlns, rich_node):
    """Read rich text, discarding formatting if not disallowed"""
    text_node = rich_node.find(QName(xmlns, 't').text)
    partial_text = text_node.text or unicode('')

    if text_node.get(QName(NAMESPACES['xml'], 'space').text) != 'preserve':
        partial_text = partial_text.strip()
    return unicode(partial_text)
Пример #3
0
def load_workbook(filename, use_iterators=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.reader.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """

    try:
        # Python 2
        is_file_instance = isinstance(filename, file)
    except NameError:
        # Python 3
        from io import BufferedReader

        is_file_instance = isinstance(filename, BufferedReader)

    if is_file_instance:
        # fileobject must have been opened with 'rb' flag
        # it is required by zipfile
        if "b" not in filename.mode:
            raise OpenModeError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, "r", ZIP_DEFLATED)
    except BadZipfile:
        try:
            repair_central_directory(filename)
            archive = ZipFile(filename, "r", ZIP_DEFLATED)
        except BadZipfile:
            e = exc_info()[1]
            raise InvalidFileException(unicode(e))
    except (BadZipfile, RuntimeError, IOError, ValueError):
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))
    wb = Workbook()

    if use_iterators:
        wb._set_optimized_read()

    try:
        _load_workbook(wb, archive, filename, use_iterators)
    except KeyError:
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))

    archive.close()
    return wb
Пример #4
0
    def get_squared_range(self, min_col, min_row, max_col, max_row):
        expected_columns = [
            get_column_letter(ci) for ci in xrange(min_col, max_col)
        ]
        current_row = min_row

        style_table = self._style_table
        for row, cells in groupby(
                self.get_cells(min_row, min_col, max_row, max_col),
                operator.attrgetter('row')):
            full_row = []
            if current_row < row:

                for gap_row in xrange(current_row, row):
                    dummy_cells = get_missing_cells(gap_row, expected_columns)
                    yield tuple(
                        [dummy_cells[column] for column in expected_columns])
                    current_row = row

            temp_cells = list(cells)
            retrieved_columns = dict([(c.column, c) for c in temp_cells])
            missing_columns = list(
                set(expected_columns) - set(retrieved_columns.keys()))
            replacement_columns = get_missing_cells(row, missing_columns)

            for column in expected_columns:
                if column in retrieved_columns:
                    cell = retrieved_columns[column]
                    if cell.style_id is not None:
                        style = style_table[int(cell.style_id)]
                        cell = cell._replace(
                            number_format=style.number_format.format_code
                        )  #pylint: disable-msg=W0212
                    if cell.internal_value is not None:
                        if cell.data_type in Cell.TYPE_STRING:
                            cell = cell._replace(internal_value=unicode(
                                self._string_table[int(cell.internal_value)])
                                                 )  #pylint: disable-msg=W0212
                        elif cell.data_type == Cell.TYPE_BOOL:
                            cell = cell._replace(
                                internal_value=cell.internal_value == '1')
                        elif cell.is_date:
                            cell = cell._replace(
                                internal_value=self._shared_date.from_julian(
                                    float(cell.internal_value)))
                        elif cell.data_type == Cell.TYPE_NUMERIC:
                            cell = cell._replace(
                                internal_value=float(cell.internal_value))
                        elif cell.data_type in (
                                Cell.TYPE_INLINE,
                                Cell.TYPE_FORMULA_CACHE_STRING):
                            cell = cell._replace(
                                internal_value=unicode(cell.internal_value))
                    full_row.append(cell)
                else:
                    full_row.append(replacement_columns[column])
            current_row = row + 1
            yield tuple(full_row)
Пример #5
0
def load_workbook(filename, use_iterators=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.reader.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """

    try:
        # Python 2
        is_file_instance = isinstance(filename, file)
    except NameError:
        # Python 3
        from io import BufferedReader
        is_file_instance = isinstance(filename, BufferedReader)

    if is_file_instance:
        # fileobject must have been opened with 'rb' flag
        # it is required by zipfile
        if 'b' not in filename.mode:
            raise OpenModeError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, 'r', ZIP_DEFLATED)
    except BadZipfile:
        try:
            repair_central_directory(filename)
            archive = ZipFile(filename, 'r', ZIP_DEFLATED)
        except BadZipfile:
            e = exc_info()[1]
            raise InvalidFileException(unicode(e))
    except (BadZipfile, RuntimeError, IOError, ValueError):
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))
    wb = Workbook()

    if use_iterators:
        wb._set_optimized_read()

    try:
        _load_workbook(wb, archive, filename, use_iterators)
    except KeyError:
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))

    archive.close()
    return wb
def get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table, shared_date):

    expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)]

    current_row = min_row
    for row, cells in get_rows(p, min_row=min_row, max_row=max_row, min_column=min_col, max_column=max_col):
        full_row = []
        if current_row < row:

            for gap_row in xrange(current_row, row):

                dummy_cells = get_missing_cells(gap_row, expected_columns)

                yield tuple([dummy_cells[column] for column in expected_columns])

                current_row = row

        temp_cells = list(cells)

        retrieved_columns = dict([(c.column, c) for c in temp_cells])

        missing_columns = list(set(expected_columns) - set(retrieved_columns.keys()))

        replacement_columns = get_missing_cells(row, missing_columns)

        for column in expected_columns:

            if column in retrieved_columns:
                cell = retrieved_columns[column]

                if cell.style_id is not None:
                    style = style_table[int(cell.style_id)]
                    cell = cell._replace(number_format=style.number_format.format_code) #pylint: disable-msg=W0212
                if cell.internal_value is not None:
                    if cell.data_type in Cell.TYPE_STRING:
                        cell = cell._replace(internal_value=unicode(string_table[int(cell.internal_value)])) #pylint: disable-msg=W0212
                    elif cell.data_type == Cell.TYPE_BOOL:
                        cell = cell._replace(internal_value=cell.internal_value == '1')
                    elif cell.is_date:
                        cell = cell._replace(internal_value=shared_date.from_julian(float(cell.internal_value)))
                    elif cell.data_type == Cell.TYPE_NUMERIC:
                        cell = cell._replace(internal_value=float(cell.internal_value))
                    elif cell.data_type in(Cell.TYPE_INLINE, Cell.TYPE_FORMULA_CACHE_STRING):
                        cell = cell._replace(internal_value=unicode(cell.internal_value))
                full_row.append(cell)

            else:
                full_row.append(replacement_columns[column])

        current_row = row + 1

        yield tuple(full_row)
Пример #7
0
 def check_string(self, value):
     """Check string coding, length, and line break character"""
     # convert to unicode string
     if not isinstance(value, unicode):
         value = unicode(value, self.encoding)
     value = unicode(value)
     # string must never be longer than 32,767 characters
     # truncate if necessary
     value = value[:32767]
     # we require that newline is represented as "\n" in core,
     # not as "\r\n" or "\r"
     value = value.replace('\r\n', '\n')
     return value
Пример #8
0
 def check_string(self, value):
     """Check string coding, length, and line break character"""
     # convert to unicode string
     if not isinstance(value, unicode):
         value = unicode(value, self.encoding)
     value = unicode(value)
     # string must never be longer than 32,767 characters
     # truncate if necessary
     value = value[:32767]
     # we require that newline is represented as "\n" in core,
     # not as "\r\n" or "\r"
     value = value.replace("\r\n", "\n")
     return value
Пример #9
0
def test_get_xml_iter():
    #1 file object
    #2 stream (file-like)
    #3 string
    #4 zipfile
    from openpyxl.reader.worksheet import _get_xml_iter
    from tempfile import TemporaryFile
    FUT = _get_xml_iter
    s = ""
    stream = FUT(s)
    assert isinstance(stream, BytesIO), type(stream)

    u = unicode(s)
    stream = FUT(u)
    assert isinstance(stream, BytesIO), type(stream)

    f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp')
    stream = FUT(f)
    assert isinstance(stream, tempfile), type(stream)
    f.close()

    from zipfile import ZipFile
    t = TemporaryFile()
    z = ZipFile(t, mode="w")
    z.writestr("test", "whatever")
    stream = FUT(z.open("test"))
    assert hasattr(stream, "read")
    z.close()
Пример #10
0
def test_get_xml_iter():
    # 1 file object
    # 2 stream (file-like)
    # 3 string
    # 4 zipfile
    from openpyxl.reader.worksheet import _get_xml_iter
    from tempfile import TemporaryFile

    FUT = _get_xml_iter
    s = ""
    stream = FUT(s)
    assert isinstance(stream, BytesIO), type(stream)

    u = unicode(s)
    stream = FUT(u)
    assert isinstance(stream, BytesIO), type(stream)

    f = TemporaryFile(mode="rb+", prefix="openpyxl.", suffix=".unpack.temp")
    stream = FUT(f)
    assert isinstance(stream, tempfile), type(stream)
    f.close()

    from zipfile import ZipFile

    t = TemporaryFile()
    z = ZipFile(t, mode="w")
    z.writestr("test", "whatever")
    stream = FUT(z.open("test"))
    assert hasattr(stream, "read")
    z.close()
Пример #11
0
def test_read_complex_formulae():
    null_file = os.path.join(DATADIR, 'reader', 'formulae.xlsx')
    wb = load_workbook(null_file)
    ws = wb.get_active_sheet()

    # Test normal forumlae
    assert ws.cell('A1').data_type != 'f'
    assert ws.cell('A2').data_type != 'f'
    assert ws.cell('A3').data_type == 'f'
    assert 'A3' not in ws.formula_attributes
    assert ws.cell('A3').value == '=12345'
    assert ws.cell('A4').data_type == 'f'
    assert 'A4' not in ws.formula_attributes
    assert ws.cell('A4').value == '=A2+A3'
    assert ws.cell('A5').data_type == 'f'
    assert 'A5' not in ws.formula_attributes
    assert ws.cell('A5').value == '=SUM(A2:A4)'

    # Test unicode
    expected = '=IF(ISBLANK(B16), "Düsseldorf", B16)'
    # Hack to prevent pytest doing it's own unicode conversion
    try:
        expected = unicode(expected, "UTF8")
    except TypeError:
        pass
    assert ws['A16'].value == expected

    # Test shared forumlae
    assert ws.cell('B7').data_type == 'f'
    assert ws.formula_attributes['B7']['t'] == 'shared'
    assert ws.formula_attributes['B7']['si'] == '0'
    assert ws.formula_attributes['B7']['ref'] == 'B7:E7'
    assert ws.cell('B7').value == '=B4*2'
    assert ws.cell('C7').data_type == 'f'
    assert ws.formula_attributes['C7']['t'] == 'shared'
    assert ws.formula_attributes['C7']['si'] == '0'
    assert 'ref' not in ws.formula_attributes['C7']
    assert ws.cell('C7').value == '='
    assert ws.cell('D7').data_type == 'f'
    assert ws.formula_attributes['D7']['t'] == 'shared'
    assert ws.formula_attributes['D7']['si'] == '0'
    assert 'ref' not in ws.formula_attributes['D7']
    assert ws.cell('D7').value == '='
    assert ws.cell('E7').data_type == 'f'
    assert ws.formula_attributes['E7']['t'] == 'shared'
    assert ws.formula_attributes['E7']['si'] == '0'
    assert 'ref' not in ws.formula_attributes['E7']
    assert ws.cell('E7').value == '='

    # Test array forumlae
    assert ws.cell('C10').data_type == 'f'
    assert 'ref' not in ws.formula_attributes['C10']['ref']
    assert ws.formula_attributes['C10']['t'] == 'array'
    assert 'si' not in ws.formula_attributes['C10']
    assert ws.formula_attributes['C10']['ref'] == 'C10:C14'
    assert ws.cell('C10').value == '=SUM(A10:A14*B10:B14)'
    assert ws.cell('C11').data_type != 'f'
Пример #12
0
def get_string(xmlns, string_index_node):
    """Read the contents of a specific string index"""
    rich_nodes = string_index_node.findall(QName(xmlns, 'r').text)
    if rich_nodes:
        reconstructed_text = []
        for rich_node in rich_nodes:
            partial_text = get_text(xmlns, rich_node)
            reconstructed_text.append(partial_text)
        return unicode(''.join(reconstructed_text))
    else:
        return get_text(xmlns, string_index_node)
Пример #13
0
def get_string(string_index_node):
    """Read the contents of a specific string index"""
    rich_nodes = string_index_node.findall('{%s}r' % SHEET_MAIN_NS)
    if rich_nodes:
        reconstructed_text = []
        for rich_node in rich_nodes:
            partial_text = get_text(rich_node)
            reconstructed_text.append(partial_text)
        return unicode(''.join(reconstructed_text))
    else:
        return get_text(string_index_node)
Пример #14
0
class NamedRange(object):
    """A named group of cells

    Scope is a worksheet object or None for workbook scope names (the default)
    """
    __slots__ = ('name', 'destinations', 'scope')

    str_format = unicode('%s!%s')
    repr_format = unicode('<%s "%s">')

    def __init__(self, name, destinations, scope=None):
        self.name = name
        self.destinations = destinations
        self.scope = scope

    def __str__(self):
        return ','.join([
            self.str_format % (sheet, name)
            for sheet, name in self.destinations
        ])

    def __repr__(self):
        return self.repr_format % (self.__class__.__name__, str(self))
Пример #15
0
def assert_equals_file_content(reference_file, fixture, filetype = 'xml'):
    if os.path.isfile(fixture):
        fixture_file = open(fixture)
        try:
            fixture_content = fixture_file.read()
        finally:
            fixture_file.close()
    else:
        fixture_content = fixture

    expected_file = open(reference_file)
    try:
        expected_content = expected_file.read()
    finally:
        expected_file.close()

    if filetype == 'xml':
        fixture_content = fromstring(fixture_content)
        pretty_indent(fixture_content)
        temp = BytesIO()
        ElementTree(fixture_content).write(temp)
        fixture_content = temp.getvalue()

        expected_content = fromstring(expected_content)
        pretty_indent(expected_content)
        temp = BytesIO()
        ElementTree(expected_content).write(temp)
        expected_content = temp.getvalue()

    fixture_lines = unicode(fixture_content).split('\n')
    expected_lines = unicode(expected_content).split('\n')
    differences = list(difflib.unified_diff(expected_lines, fixture_lines))
    if differences:
        temp = BytesIO()
        pprint(differences, stream = temp)
        assert False, 'Differences found : %s' % temp.getvalue()
Пример #16
0
def test_get_xml_iter():
    from openpyxl.reader.worksheet import _get_xml_iter
    from tempfile import TemporaryFile
    FUT = _get_xml_iter
    s = ""
    stream = FUT(s)
    assert isinstance(stream, BytesIO), type(stream)

    u = unicode(s)
    stream = FUT(u)
    assert isinstance(stream, BytesIO), type(stream)

    f = TemporaryFile(mode='rb+', prefix='openpyxl.', suffix='.unpack.temp')
    stream = FUT(f)
    assert isinstance(stream, tempfile), type(stream)
    f.close()
Пример #17
0
class Worksheet(object):
    """Represents a worksheet.

    Do not create worksheets yourself,
    use :func:`openpyxl.workbook.Workbook.create_sheet` instead

    """
    repr_format = unicode('<Worksheet "%s">')

    BREAK_NONE = 0
    BREAK_ROW = 1
    BREAK_COLUMN = 2

    SHEETSTATE_VISIBLE = 'visible'
    SHEETSTATE_HIDDEN = 'hidden'
    SHEETSTATE_VERYHIDDEN = 'veryHidden'

    # Paper size
    PAPERSIZE_LETTER = '1'
    PAPERSIZE_LETTER_SMALL = '2'
    PAPERSIZE_TABLOID = '3'
    PAPERSIZE_LEDGER = '4'
    PAPERSIZE_LEGAL = '5'
    PAPERSIZE_STATEMENT = '6'
    PAPERSIZE_EXECUTIVE = '7'
    PAPERSIZE_A3 = '8'
    PAPERSIZE_A4 = '9'
    PAPERSIZE_A4_SMALL = '10'
    PAPERSIZE_A5 = '11'

    # Page orientation
    ORIENTATION_PORTRAIT = 'portrait'
    ORIENTATION_LANDSCAPE = 'landscape'

    def __init__(self, parent_workbook, title='Sheet'):
        self._parent = parent_workbook
        self._title = ''
        if not title:
            self.title = 'Sheet%d' % (1 + len(self._parent.worksheets))
        else:
            self.title = title
        self.row_dimensions = {}
        self.column_dimensions = {}
        self._cells = {}
        self._styles = {}
        self._charts = []
        self._images = []
        self._merged_cells = []
        self.relationships = []
        self._data_validations = []
        self.selected_cell = 'A1'
        self.active_cell = 'A1'
        self.sheet_state = self.SHEETSTATE_VISIBLE
        self.page_setup = PageSetup()
        self.page_margins = PageMargins()
        self.header_footer = HeaderFooter()
        self.sheet_view = SheetView()
        self.protection = SheetProtection()
        self.show_gridlines = True
        self.print_gridlines = False
        self.show_summary_below = True
        self.show_summary_right = True
        self.default_row_dimension = RowDimension()
        self.default_column_dimension = ColumnDimension()
        self._auto_filter = None
        self._freeze_panes = None
        self.paper_size = None
        self.orientation = None

    def __repr__(self):
        return self.repr_format % self.title

    @property
    def parent(self):
        return self._parent

    @property
    def encoding(self):
        return self._parent.encoding

    def garbage_collect(self):
        """Delete cells that are not storing a value."""
        delete_list = [coordinate for coordinate, cell in \
            iteritems(self._cells) if (not cell.merged and cell.value in ('', None) and \
            (coordinate not in self._styles or
            hash(cell.style) == _DEFAULTS_STYLE_HASH))]
        for coordinate in delete_list:
            del self._cells[coordinate]

    def get_cell_collection(self):
        """Return an unordered list of the cells in this worksheet."""
        return self._cells.values()

    def _set_title(self, value):
        """Set a sheet title, ensuring it is valid."""
        bad_title_char_re = re.compile(r'[\\*?:/\[\]]')
        if bad_title_char_re.search(value):
            msg = 'Invalid character found in sheet title'
            raise SheetTitleException(msg)

        # check if sheet_name already exists
        # do this *before* length check
        if self._parent.get_sheet_by_name(value):
            # use name, but append with lowest possible integer
            i = 1
            while self._parent.get_sheet_by_name('%s%d' % (value, i)):
                i += 1
            value = '%s%d' % (value, i)
        if len(value) > 31:
            msg = 'Maximum 31 characters allowed in sheet title'
            raise SheetTitleException(msg)
        self._title = value

    def _get_title(self):
        """Return the title for this sheet."""
        return self._title

    title = property(_get_title,
                     _set_title,
                     doc='Get or set the title of the worksheet. '
                     'Limited to 31 characters, no special characters.')

    def _set_auto_filter(self, range):
        # Normalize range to a str or None
        if not range:
            range = None
        elif isinstance(range, str):
            range = range.upper()
        else:  # Assume a range
            range = range[0][0].address + ':' + range[-1][-1].address
        self._auto_filter = range

    def _get_auto_filter(self):
        return self._auto_filter

    auto_filter = property(_get_auto_filter,
                           _set_auto_filter,
                           doc='get or set auto filtering on columns')

    def _set_freeze_panes(self, topLeftCell):
        if not topLeftCell:
            topLeftCell = None
        elif isinstance(topLeftCell, str):
            topLeftCell = topLeftCell.upper()
        else:  # Assume a cell
            topLeftCell = topLeftCell.address
        if topLeftCell == 'A1':
            topLeftCell = None
        self._freeze_panes = topLeftCell

    def _get_freeze_panes(self):
        return self._freeze_panes

    freeze_panes = property(_get_freeze_panes,
                            _set_freeze_panes,
                            doc="Get or set frozen panes")

    def cell(self, coordinate=None, row=None, column=None):
        """Returns a cell object based on the given coordinates.

        Usage: cell(coodinate='A15') **or** cell(row=15, column=1)

        If `coordinates` are not given, then row *and* column must be given.

        Cells are kept in a dictionary which is empty at the worksheet
        creation.  Calling `cell` creates the cell in memory when they
        are first accessed, to reduce memory usage.

        :param coordinate: coordinates of the cell (e.g. 'B12')
        :type coordinate: string

        :param row: row index of the cell (e.g. 4)
        :type row: int

        :param column: column index of the cell (e.g. 3)
        :type column: int

        :raise: InsufficientCoordinatesException when coordinate or (row and column) are not given

        :rtype: :class:`openpyxl.cell.Cell`

        """
        if not coordinate:
            if (row is None or column is None):
                msg = "You have to provide a value either for " \
                        "'coordinate' or for 'row' *and* 'column'"
                raise InsufficientCoordinatesException(msg)
            else:
                coordinate = '%s%s' % (get_column_letter(column + 1), row + 1)
        else:
            coordinate = coordinate.replace('$', '')

        return self._get_cell(coordinate)

    def _get_cell(self, coordinate):

        if not coordinate in self._cells:
            column, row = coordinate_from_string(coordinate)
            new_cell = openpyxl.cell.Cell(self, column, row)
            self._cells[coordinate] = new_cell
            if column not in self.column_dimensions:
                self.column_dimensions[column] = ColumnDimension(column)
            if row not in self.row_dimensions:
                self.row_dimensions[row] = RowDimension(row)
        return self._cells[coordinate]

    def get_highest_row(self):
        """Returns the maximum row index containing data

        :rtype: int
        """
        if self.row_dimensions:
            return max(self.row_dimensions.keys())
        else:
            return 1

    def get_highest_column(self):
        """Get the largest value for column currently stored.

        :rtype: int
        """
        if self.column_dimensions:
            return max([
                column_index_from_string(column_index)
                for column_index in self.column_dimensions
            ])
        else:
            return 1

    def calculate_dimension(self):
        """Return the minimum bounding range for all cells containing data."""
        return 'A1:%s%d' % (get_column_letter(
            self.get_highest_column()), self.get_highest_row())

    def range(self, range_string, row=0, column=0):
        """Returns a 2D array of cells, with optional row and column offsets.

        :param range_string: cell range string or `named range` name
        :type range_string: string

        :param row: number of rows to offset
        :type row: int

        :param column: number of columns to offset
        :type column: int

        :rtype: tuples of tuples of :class:`openpyxl.cell.Cell`

        """
        if ':' in range_string:
            # R1C1 range
            result = []
            min_range, max_range = range_string.split(':')
            min_col, min_row = coordinate_from_string(min_range)
            max_col, max_row = coordinate_from_string(max_range)
            if column:
                min_col = get_column_letter(
                    column_index_from_string(min_col) + column)
                max_col = get_column_letter(
                    column_index_from_string(max_col) + column)
            min_col = column_index_from_string(min_col)
            max_col = column_index_from_string(max_col)
            cache_cols = {}
            for col in xrange(min_col, max_col + 1):
                cache_cols[col] = get_column_letter(col)
            rows = xrange(min_row + row, max_row + row + 1)
            cols = xrange(min_col, max_col + 1)
            for row in rows:
                new_row = []
                for col in cols:
                    new_row.append(self.cell('%s%s' % (cache_cols[col], row)))
                result.append(tuple(new_row))
            return tuple(result)
        else:
            try:
                return self.cell(coordinate=range_string,
                                 row=row,
                                 column=column)
            except CellCoordinatesException:
                pass

            # named range
            named_range = self._parent.get_named_range(range_string)
            if named_range is None:
                msg = '%s is not a valid range name' % range_string
                raise NamedRangeException(msg)
            if isinstance(named_range, NamedRangeContainingValue):
                msg = '%s refers to a value, not a range' % range_string
                raise NamedRangeException(msg)

            result = []
            for destination in named_range.destinations:

                worksheet, cells_range = destination

                if worksheet is not self:
                    msg = 'Range %s is not defined on worksheet %s' % \
                            (cells_range, self.title)
                    raise NamedRangeException(msg)

                content = self.range(cells_range)

                if isinstance(content, tuple):
                    for cells in content:
                        result.extend(cells)
                else:
                    result.append(content)

            if len(result) == 1:
                return result[0]
            else:
                return tuple(result)

    def get_style(self, coordinate):
        """Return the style object for the specified cell."""
        if not coordinate in self._styles:
            self._styles[coordinate] = Style()
        return self._styles[coordinate]

    def set_printer_settings(self, paper_size, orientation):
        """Set printer settings """

        self.paper_size = paper_size
        assert orientation in (
            self.ORIENTATION_PORTRAIT,
            self.ORIENTATION_LANDSCAPE), "Values should be %s or %s" % (
                self.ORIENTATION_PORTRAIT, self.ORIENTATION_LANDSCAPE)
        self.orientation = orientation

    def create_relationship(self, rel_type):
        """Add a relationship for this sheet."""
        rel = Relationship(rel_type)
        self.relationships.append(rel)
        rel_id = self.relationships.index(rel)
        rel.id = 'rId' + str(rel_id + 1)
        return self.relationships[rel_id]

    def add_data_validation(self, data_validation):
        """ Add a data-validation object to the sheet.  The data-validation
            object defines the type of data-validation to be applied and the
            cell or range of cells it should apply to.
        """
        data_validation._sheet = self
        self._data_validations.append(data_validation)

    def add_chart(self, chart):
        """ Add a chart to the sheet """

        chart._sheet = self
        self._charts.append(chart)

    def add_image(self, img):
        """ Add an image to the sheet """

        img._sheet = self
        self._images.append(img)

    def merge_cells(self,
                    range_string=None,
                    start_row=None,
                    start_column=None,
                    end_row=None,
                    end_column=None):
        """ Set merge on a cell range.  Range is a cell range (e.g. A1:E1) """
        if not range_string:
            if start_row is None or start_column is None or end_row is None or end_column is None:
                msg = "You have to provide a value either for "\
                      "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'"
                raise InsufficientCoordinatesException(msg)
            else:
                range_string = '%s%s:%s%s' % (
                    get_column_letter(start_column + 1), start_row + 1,
                    get_column_letter(end_column + 1), end_row + 1)
        elif len(range_string.split(':')) != 2:
            msg = "Range must be a cell range (e.g. A1:E1)"
            raise InsufficientCoordinatesException(msg)
        else:
            range_string = range_string.replace('$', '')

        # Make sure top_left cell exists - is this necessary?
        min_col, min_row = coordinate_from_string(range_string.split(':')[0])
        max_col, max_row = coordinate_from_string(range_string.split(':')[1])
        min_col = column_index_from_string(min_col)
        max_col = column_index_from_string(max_col)
        # Blank out the rest of the cells in the range
        for col in xrange(min_col, max_col + 1):
            for row in xrange(min_row, max_row + 1):
                if not (row == min_row and col == min_col):
                    # PHPExcel adds cell and specifically blanks it out if it doesn't exist
                    self._get_cell('%s%s' %
                                   (get_column_letter(col), row)).value = None
                    self._get_cell('%s%s' %
                                   (get_column_letter(col), row)).merged = True

        if range_string not in self._merged_cells:
            self._merged_cells.append(range_string)

    def unmerge_cells(self,
                      range_string=None,
                      start_row=None,
                      start_column=None,
                      end_row=None,
                      end_column=None):
        """ Remove merge on a cell range.  Range is a cell range (e.g. A1:E1) """
        if not range_string:
            if start_row is None or start_column is None or end_row is None or end_column is None:
                msg = "You have to provide a value either for "\
                      "'coordinate' or for 'start_row', 'start_column', 'end_row' *and* 'end_column'"
                raise InsufficientCoordinatesException(msg)
            else:
                range_string = '%s%s:%s%s' % (
                    get_column_letter(start_column + 1), start_row + 1,
                    get_column_letter(end_column + 1), end_row + 1)
        elif len(range_string.split(':')) != 2:
            msg = "Range must be a cell range (e.g. A1:E1)"
            raise InsufficientCoordinatesException(msg)
        else:
            range_string = range_string.replace('$', '')

        if range_string in self._merged_cells:
            self._merged_cells.remove(range_string)
            min_col, min_row = coordinate_from_string(
                range_string.split(':')[0])
            max_col, max_row = coordinate_from_string(
                range_string.split(':')[1])
            min_col = column_index_from_string(min_col)
            max_col = column_index_from_string(max_col)
            # Mark cell as unmerged
            for col in xrange(min_col, max_col + 1):
                for row in xrange(min_row, max_row + 1):
                    if not (row == min_row and col == min_col):
                        self._get_cell(
                            '%s%s' %
                            (get_column_letter(col), row)).merged = False
        else:
            msg = 'Cell range %s not known as merged.' % range_string
            raise InsufficientCoordinatesException(msg)

    def append(self, list_or_dict):
        """Appends a group of values at the bottom of the current sheet.

        * If it's a list: all values are added in order, starting from the first column
        * If it's a dict: values are assigned to the columns indicated by the keys (numbers or letters)

        :param list_or_dict: list or dict containing values to append
        :type list_or_dict: list/tuple or dict

        Usage:

        * append(['This is A1', 'This is B1', 'This is C1'])
        * **or** append({'A' : 'This is A1', 'C' : 'This is C1'})
        * **or** append({0 : 'This is A1', 2 : 'This is C1'})

        :raise: TypeError when list_or_dict is neither a list/tuple nor a dict

        """
        row_idx = len(self.row_dimensions)
        if isinstance(list_or_dict, (list, tuple)):
            for col_idx, content in enumerate(list_or_dict):
                self.cell(row=row_idx, column=col_idx).value = content

        elif isinstance(list_or_dict, dict):
            for col_idx, content in iteritems(list_or_dict):
                if isinstance(col_idx, basestring):
                    col_idx = column_index_from_string(col_idx) - 1
                self.cell(row=row_idx, column=col_idx).value = content

        else:
            raise TypeError('list_or_dict must be a list or a dict')

    @property
    def rows(self):
        return self.range(self.calculate_dimension())

    @property
    def columns(self):
        max_row = self.get_highest_row()
        cols = []
        for col_idx in range(self.get_highest_column()):
            col = get_column_letter(col_idx + 1)
            res = self.range('%s1:%s%d' % (col, col, max_row))
            cols.append(tuple([x[0] for x in res]))

        return tuple(cols)

    def point_pos(self, left=0, top=0):
        """ tells which cell is under the given coordinates (in pixels)
        counting from the top-left corner of the sheet.
        Can be used to locate images and charts on the worksheet """
        current_col = 1
        current_row = 1
        column_dimensions = self.column_dimensions
        row_dimensions = self.row_dimensions
        default_width = points_to_pixels(DEFAULT_COLUMN_WIDTH)
        default_height = points_to_pixels(DEFAULT_ROW_HEIGHT)
        left_pos = 0
        top_pos = 0

        while left_pos <= left:
            letter = get_column_letter(current_col)
            current_col += 1
            if letter in column_dimensions:
                cdw = column_dimensions[letter].width
                if cdw > 0:
                    left_pos += points_to_pixels(cdw)
                    continue
            left_pos += default_width

        while top_pos <= top:
            row = current_row
            current_row += 1
            if row in row_dimensions:
                rdh = row_dimensions[row].height
                if rdh > 0:
                    top_pos += points_to_pixels(rdh)
                    continue
            top_pos += default_height

        return (letter, row)
Пример #18
0
 def check_error(self, value):
     """Tries to convert Error" else N/A"""
     try:
         return unicode(value)
     except:
         return unicode('#N/A')
Пример #19
0
 def __repr__(self):
     return unicode("<Cell %s.%s>") % (self.parent.title,
                                       self.get_coordinate())
Пример #20
0
 def check_error(self, value):
     """Tries to convert Error" else N/A"""
     try:
         return unicode(value)
     except:
         return unicode('#N/A')
Пример #21
0
def load_workbook(filename,
                  use_iterators=False,
                  keep_vba=False,
                  guess_types=True,
                  data_only=False):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
    :type data_only: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.reader.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """

    is_file_instance = isinstance(filename, file)

    if is_file_instance:
        # fileobject must have been opened with 'rb' flag
        # it is required by zipfile
        if 'b' not in filename.mode:
            raise OpenModeError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, 'r', ZIP_DEFLATED)
    except BadZipfile:
        try:
            f = repair_central_directory(filename, is_file_instance)
            archive = ZipFile(f, 'r', ZIP_DEFLATED)
        except BadZipfile:
            e = exc_info()[1]
            raise InvalidFileException(unicode(e))
    except (BadZipfile, RuntimeError, IOError, ValueError):
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))
    wb = Workbook(guess_types=guess_types, data_only=data_only)

    if use_iterators:
        wb._set_optimized_read()
        if not guess_types:
            warnings.warn('please note that data types are not guessed '
                          'when using iterator reader, so you do not need '
                          'to use guess_types=False')

    try:
        _load_workbook(wb, archive, filename, use_iterators, keep_vba)
    except KeyError:
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))

    if not keep_vba:
        archive.close()
    return wb
Пример #22
0
 def __repr__(self):
     return unicode("<Cell %s.%s>") % (self.parent.title, self.get_coordinate())
Пример #23
0
def load_workbook(filename, use_iterators=False, keep_vba=False, guess_types=True):
    """Open the given filename and return the workbook

    :param filename: the path to open or a file-like object
    :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

    :param use_iterators: use lazy load for cells
    :type use_iterators: bool

    :param keep_vba: preseve vba content (this does NOT mean you can use it)
    :type keep_vba: bool

    :param guess_types: guess cell content type and do not read it from the file
    :type guess_types: bool

    :rtype: :class:`openpyxl.workbook.Workbook`

    .. note::

        When using lazy load, all worksheets will be :class:`openpyxl.reader.iter_worksheet.IterableWorksheet`
        and the returned workbook will be read-only.

    """

    try:
        # Python 2
        is_file_instance = isinstance(filename, file)
    except NameError:
        # Python 3
        from io import BufferedReader
        is_file_instance = isinstance(filename, BufferedReader)

    if is_file_instance:
        # fileobject must have been opened with 'rb' flag
        # it is required by zipfile
        if 'b' not in filename.mode:
            raise OpenModeError("File-object must be opened in binary mode")

    try:
        archive = ZipFile(filename, 'r', ZIP_DEFLATED)
    except BadZipfile:
        try:
            f = repair_central_directory(filename, is_file_instance)
            archive = ZipFile(f, 'r', ZIP_DEFLATED)
        except BadZipfile:
            e = exc_info()[1]
            raise InvalidFileException(unicode(e))
    except (BadZipfile, RuntimeError, IOError, ValueError):
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))
    wb = Workbook(guess_types=guess_types)

    if use_iterators:
        wb._set_optimized_read()
        if not guess_types:
            warnings.warn('please note that data types are not guessed '
                          'when using iterator reader, so you do not need '
                          'to use guess_types=False')

    try:
        _load_workbook(wb, archive, filename, use_iterators, keep_vba)
    except KeyError:
        e = exc_info()[1]
        raise InvalidFileException(unicode(e))

    if not keep_vba:
        archive.close()
    return wb