def copy_row(ws, source_row, dest_row, copy_data=False, copy_style=True, copy_merged_columns=True): """Copia uma linha da planilha para a linha imediatamento abaixo mantendo todos os atributos.""" CELL_RE = re.compile("(?P<col>\$?[A-Z]+)(?P<row>\$?\d+)") def replace(m): row = m.group('row') prefix = "$" if row.find("$") != -1 else "" row = int(row.replace("$", "")) row += dest_row if row > source_row else 0 return m.group('col') + prefix + str(row) # Fazendo unmerge das celulas de destino. mergedcells = [] for group in ws.merged_cells.ranges: mergedcells.append(group) for cr in mergedcells: min_col, min_row, max_col, max_row = range_boundaries(str(cr)) if max_row == min_row == source_row + 1: ws.unmerge_cells(str(cr)) new_row_idx = source_row + 1 for row in range(new_row_idx, new_row_idx + dest_row): new_rd = copy(ws.row_dimensions[row - 1]) new_rd.index = row ws.row_dimensions[row] = new_rd for col in range(ws.max_column): col = get_column_letter(col + 1) cell = ws['%s%d' % (col, row)] source = ws['%s%d' % (col, row - 1)] if copy_style: cell._style = copy(source._style) if copy_data and not isinstance(cell, MergedCell): cell.data_type = source.data_type cell.value = source.value for cr_idx, cr in enumerate(ws.merged_cell_ranges): ws.merged_cell_ranges[cr_idx] = CELL_RE.sub(replace, str(cr)) if copy_merged_columns: for cr in ws.merged_cells.ranges: min_col, min_row, max_col, max_row = range_boundaries(str(cr)) if max_row == min_row == source_row: for row in range(new_row_idx, new_row_idx + dest_row): newCellRange = get_column_letter(min_col) + str( row) + ":" + get_column_letter(max_col) + str(row) ws.merge_cells(newCellRange)
def set_border_is(ws, cell_range,bg=WHITE,align=True): font = Font(name='Arial',size=10,bold=False) if align: alignment=Alignment(horizontal='center',vertical='center',wrap_text=False,shrink_to_fit=True) else: alignment=Alignment(wrap_text=False,shrink_to_fit=True) if bg != WHITE: style_border = Style(border=Border(left=Side(border_style=BORDER_MEDIUM), right=Side(border_style=BORDER_MEDIUM), top=Side(border_style=BORDER_MEDIUM), bottom=Side(border_style=BORDER_MEDIUM)), alignment=alignment, fill=PatternFill(patternType='solid',start_color=bg), font=font) else: style_border = Style(border=Border(left=Side(border_style=BORDER_MEDIUM), right=Side(border_style=BORDER_MEDIUM), top=Side(border_style=BORDER_MEDIUM), bottom=Side(border_style=BORDER_MEDIUM)), alignment=alignment, font=font) #row = ws.iter_rows(cell_range) min_col, min_row, max_col, max_row = range_boundaries(cell_range.upper()) #print "TEST:",min_col, min_row, max_col, max_row for index_row, rows in enumerate(ws.iter_rows(cell_range)): #for row in rows: index_column = 0 for row in rows: Style.setStyleRow(row,style_border) #row.style = style_border index_column +=1
def __init__(self, range_string=None, min_col=None, min_row=None, max_col=None, max_row=None, title=None): if range_string is not None: try: title, (min_col, min_row, max_col, max_row) = range_to_tuple(range_string) except ValueError: min_col, min_row, max_col, max_row = range_boundaries( range_string) # None > 0 is False if not all(idx > 0 for idx in (min_col, min_row, max_col, max_row)): msg = "Values for 'min_col', 'min_row', 'max_col' *and* 'max_row_' " \ "must be strictly positive" raise ValueError(msg) # Intervals are inclusive if not min_col <= max_col: fmt = "{max_col} must be greater than {min_col}" raise ValueError(fmt.format(min_col=min_col, max_col=max_col)) if not min_row <= max_row: fmt = "{max_row} must be greater than {min_row}" raise ValueError(fmt.format(min_row=min_row, max_row=max_row)) self.min_col = min_col self.min_row = min_row self.max_col = max_col self.max_row = max_row self.title = title
def __init__(self, range_string=None, min_col=None, min_row=None, max_col=None, max_row=None, title=None): if range_string is not None: if "!" in range_string: title, (min_col, min_row, max_col, max_row) = range_to_tuple(range_string) else: min_col, min_row, max_col, max_row = range_boundaries( range_string) self.min_col = min_col self.min_row = min_row self.max_col = max_col self.max_row = max_row self.title = title if min_col > max_col: fmt = "{max_col} must be greater than {min_col}" raise ValueError(fmt.format(min_col=min_col, max_col=max_col)) if min_row > max_row: fmt = "{max_row} must be greater than {min_row}" raise ValueError(fmt.format(min_row=min_row, max_row=max_row))
def merge_cells(self, range_string=None, start_row=None, start_column=None, end_row=None, end_column=None): """ Set merge on a cell range. Range is a cell range (e.g. A1:E1) """ if not range_string and not all((start_row, start_column, end_row, end_column)): msg = "You have to provide a value either for 'coordinate' or for\ 'start_row', 'start_column', 'end_row' *and* 'end_column'" raise ValueError(msg) elif not range_string: range_string = '%s%s:%s%s' % (get_column_letter(start_column), start_row, get_column_letter(end_column), end_row) elif ":" not in range_string: if COORD_RE.match(range_string): return # Single cell, do nothing raise ValueError("Range must be a cell range (e.g. A1:E1)") else: range_string = range_string.replace('$', '') if range_string not in self._merged_cells: self._merged_cells.append(range_string) min_col, min_row, max_col, max_row = range_boundaries(range_string) rows = range(min_row, max_row+1) cols = range(min_col, max_col+1) cells = product(rows, cols) # all but the top-left cell are removed for c in islice(cells, 1, None): if c in self._cells: del self._cells[c]
def iter_rows(self, range_string=None, row_offset=0, column_offset=0): """ Returns a squared range based on the `range_string` parameter, using generators. If no range is passed, will iterate over all cells in the worksheet :param range_string: range of cells (e.g. 'A1:C4') :type range_string: string :param row_offset: additional rows (e.g. 4) :type row: int :param column_offset: additonal columns (e.g. 3) :type column: int :rtype: generator """ if range_string is not None: min_col, min_row, max_col, max_row = range_boundaries( range_string.upper()) else: min_col, min_row, max_col, max_row = (1, 1, self.max_column, self.max_row) if max_col is not None: max_col += column_offset if max_row is not None: max_row += row_offset return self.get_squared_range(min_col + column_offset, min_row + row_offset, max_col, max_row)
def iter_rows(self, range_string=None, min_row=None, max_row=None, min_col=None, max_col=None, row_offset=0, column_offset=0): """ Return cells from the worksheet as rows. Boundaries for the cells can be passed in either as indices of rows and columns. If no boundaries are passed in the cells will start at A1. If no cells are in the worksheet an empty tuple will be returned. Additional rows and columns can be created using offsets. :param min_col: smallest column index (1-based index) :type min_col: int :param min_row: smallest row index (1-based index) :type min_row: int :param max_col: largest column index (1-based index) :type max_col: int :param max_row: smallest row index (1-based index) :type max_row: int :param row_offset: additional rows (e.g. 4) :type row: int :param column_offset: additonal columns (e.g. 3) :type column: int :rtype: generator """ if range_string is not None: warn("Using a range string is deprecated. Use ws[range_string]") min_col, min_row, max_col, max_row = range_boundaries( range_string.upper()) if self._current_row == 0 and not any( [min_col, min_row, max_col, max_row]): return () min_col = min_col or 1 min_row = min_row or 1 max_col = max_col or self.max_column max_row = max_row or self.max_row if max_col is not None: max_col += column_offset if max_row is not None: max_row += row_offset return self.get_squared_range(min_col + column_offset, min_row + row_offset, max_col, max_row)
def iter_rows(self, range_string=None, row_offset=0, column_offset=0): """ Returns a squared range based on the `range_string` parameter, using generators. If no range is passed, will iterate over all cells in the worksheet :param range_string: range of cells (e.g. 'A1:C4') :type range_string: string :param row_offset: additional rows (e.g. 4) :type row: int :param column_offset: additonal columns (e.g. 3) :type column: int :rtype: generator """ if range_string is not None: min_col, min_row, max_col, max_row = range_boundaries(range_string.upper()) else: min_col, min_row, max_col, max_row = (1, 1, self.max_column, self.max_row) if max_col is not None: max_col += column_offset if max_row is not None: max_row += row_offset return self.get_squared_range(min_col + column_offset, min_row + row_offset, max_col, max_row)
def merge_cells(self, range_string=None, start_row=None, start_column=None, end_row=None, end_column=None): """ Set merge on a cell range. Range is a cell range (e.g. A1:E1) """ if not range_string and not all( (start_row, start_column, end_row, end_column)): msg = "You have to provide a value either for 'coordinate' or for\ 'start_row', 'start_column', 'end_row' *and* 'end_column'" raise ValueError(msg) elif not range_string: range_string = '%s%s:%s%s' % ( get_column_letter(start_column), start_row, get_column_letter(end_column), end_row) elif ":" not in range_string: if COORD_RE.match(range_string): return # Single cell, do nothing raise ValueError("Range must be a cell range (e.g. A1:E1)") else: range_string = range_string.replace('$', '') if range_string not in self._merged_cells: self._merged_cells.append(range_string) min_col, min_row, max_col, max_row = range_boundaries(range_string) rows = range(min_row, max_row + 1) cols = range(min_col, max_col + 1) cells = product(rows, cols) # all but the top-left cell are removed for c in islice(cells, 1, None): if c in self._cells: del self._cells[c]
def set_range_as_source(self, cell_range=None): """ use sheet as source for table data :param cell_range: range of the cells within the sheet that hold the table data NOTE defaults to all the data on the chosen sheet """ ws = self.work_sheet if cell_range is None: min_col, min_row, max_col, max_row = ws.min_column, ws.min_row, ws.max_column, ws.max_row elif isinstance(cell_range, str): min_col, min_row, max_col, max_row = range_boundaries(cell_range) else: min_col, min_row, max_col, max_row = self.cell_range2components( cell_range) if not(ws.min_column <= min_col <= max_col <= ws.max_column) or\ not(ws.min_row <= min_row <= max_row <= ws.max_row): raise ExcelTableError( f"cell range ({min_col},{min_row})-({max_col},{max_row})" f" not valid for sheet '{self.sheet_name}'" f" in {self.source_file}") self.cell_range = self.components2cell_range(min_col, min_row, max_col, max_row)
def __setMergeCellsCharAndIndex(self): beifen = [ utils.range_boundaries(x) for x in self.sheet.merged_cell_ranges ] for i in beifen: t0 = utils.get_column_letter(i[0]) t2 = utils.get_column_letter(i[2]) self.mergeCellsCharAndIndex.append((t0, i[1], t2, i[3]))
def iter_rows(self, range_string=None, min_row=None, max_row=None, min_col=None, max_col=None, row_offset=0, column_offset=0): """ Return cells from the worksheet as rows. Boundaries for the cells can be passed in either as indices of rows and columns. If no boundaries are passed in the cells will start at A1. If no cells are in the worksheet an empty tuple will be returned. Additional rows and columns can be created using offsets. :param range_string: range string (e.g. 'A1:B2') *deprecated* :type range_string: string :param min_col: smallest column index (1-based index) :type min_col: int :param min_row: smallest row index (1-based index) :type min_row: int :param max_col: largest column index (1-based index) :type max_col: int :param max_row: smallest row index (1-based index) :type max_row: int :param row_offset: additional rows (e.g. 4) :type row_offset: int :param column_offset: additional columns (e.g. 3) :type column_offset: int :rtype: generator """ if range_string is not None: warn("Using a range string is deprecated. Use ws[range_string]") min_col, min_row, max_col, max_row = range_boundaries(range_string.upper()) if self._current_row == 0 and not any([min_col, min_row, max_col, max_row ]): return () min_col = min_col or 1 min_row = min_row or 1 max_col = max_col or self.max_column max_row = max_row or self.max_row if max_col is not None: max_col += column_offset if max_row is not None: max_row += row_offset return self._cells_by_row(min_col + column_offset, min_row + row_offset, max_col, max_row)
def _initialise_columns(ref): """ Create a list of table columns from a cell range """ from openpyxl.utils import range_boundaries min_col, min_row, max_col, max_row = range_boundaries(ref) for idx in range(min_col, max_col + 1): yield TableColumn(id=idx, name="Column{0}".format(idx))
def _initialise_columns(ref): """ Create a list of table columns from a cell range """ from openpyxl.utils import range_boundaries min_col, min_row, max_col, max_row = range_boundaries(ref) for idx in range(min_col, max_col+1): yield TableColumn(id=idx, name="Column{0}".format(idx))
def iter_rows(self, index): """ produce cell values by row within the given index :param index: e.g. "A2" or "F7:K51" """ min_col, min_row, max_col, max_row = range_boundaries(index) return self.iterate_by_row(min_col, min_row, max_col, max_row)
def write_rows_in_sheet_at_cell(sheet, cellstr, rows): """ Write the given rows to the current sheet starting at the given cell. It is assumed that rows is is a list of lists with each child list being the same length. Note that nothing is returned since the `sheet` object is modified directly. Parameters ---------- sheet : openpyxl.worksheet.worksheet.Worksheet openpyxl Worksheet instance in which to write the given rows. cellstr : str Alphanumeric cell location in an Excel spreadsheet, e.g., 'A26'. The first element of the first list in `rows` is written at this location. rows : list of lists A list of lists with each list containing the same number of strings. """ # figure out how many rows we are writing first num_rows_to_add = len(rows) # if there are any rows to add if num_rows_to_add > 0: # get the Cell object corresponding to the given # cell string starting_cell = sheet[cellstr] # compute the ending row index in the spreadsheet ending_row_index = starting_cell.row + num_rows_to_add - 1 # compute the new column letter in the spreadsheet new_column_letter = get_column_letter(starting_cell.column + len(rows[0]) - 1) # get the cell range that we will be modifying cell_range = '{}:{}{}'.format(cellstr, new_column_letter, ending_row_index) # get the min and max rows and columns which openpyxl needs to # iterate over the rows (min_col, min_row, max_col, max_row) = range_boundaries(cell_range) # iterate over each spreadsheet row and write each data row # to the cells in the spreadsheet row for idx, sheet_row in enumerate(sheet.iter_rows(min_col=min_col, max_col=max_col, min_row=min_row, max_row=max_row)): for cell, value in zip(sheet_row, rows[idx]): cell.value = value
def cell_in_range(row, col, rng): """ :param row: row index :param col: column index :param rng: a string describing a range :return: """ rn_coords = range_boundaries(rng) return ((rn_coords[0] <= col <= rn_coords[2]) & (rn_coords[1] <= row <= rn_coords[3]))
def iter_rows(self, range_string=None, min_row=None, max_row=None, min_col=None, max_col=None, row_offset=0, column_offset=0): """ Produces cells from the worksheet, by row. Specify the iteration range using indices of rows and columns. If no indices are specified the range starts at A1. If no cells are in the worksheet an empty tuple will be returned. :param range_string: range string (e.g. 'A1:B2') *deprecated* :type range_string: string :param min_col: smallest column index (1-based index) :type min_col: int :param min_row: smallest row index (1-based index) :type min_row: int :param max_col: largest column index (1-based index) :type max_col: int :param max_row: smallest row index (1-based index) :type max_row: int :param row_offset: added to min_row and max_row (e.g. 4) :type row_offset: int :param column_offset: added to min_col and max_col (e.g. 3) :type column_offset: int :rtype: generator """ if range_string is not None: warn("Using a range string with iter_rows is deprecated. Use ws[range_string]") min_col, min_row, max_col, max_row = range_boundaries(range_string.upper()) if self._current_row == 0 and not any([min_col, min_row, max_col, max_row ]): return () min_col = min_col or 1 min_row = min_row or 1 max_col = max_col or self.max_column max_row = max_row or self.max_row if max_col is not None: max_col += column_offset if max_row is not None: max_row += row_offset return self._cells_by_row(min_col + column_offset, min_row + row_offset, max_col, max_row)
def _initialise_columns(self): """ Create a list of table columns from a cell range Always set a ref if we have headers (the default) Column headings must be strings and must match cells in the worksheet. """ min_col, min_row, max_col, max_row = range_boundaries(self.ref) for idx in range(min_col, max_col + 1): col = TableColumn(id=idx, name="Column{0}".format(idx)) self.tableColumns.append(col) if self.headerRowCount: self.autoFilter = AutoFilter(ref=self.ref)
def _initialise_columns(self): """ Create a list of table columns from a cell range Always set a ref if we have headers (the default) Column headings must be strings and must match cells in the worksheet. """ min_col, min_row, max_col, max_row = range_boundaries(self.ref) for idx in range(min_col, max_col+1): col = TableColumn(id=idx, name="Column{0}".format(idx)) self.tableColumns.append(col) if self.headerRowCount: self.autoFilter = AutoFilter(ref=self.ref)
def _is_merged_cell(self, sheet, row, column): coor = sheet.cell(row=row, column=column).coordinate for coor in sheet.merged_cells: # return True, (row, column) for irange in sheet.merged_cell_ranges: min_col, min_row, max_col, max_row = range_boundaries(irange) if(row in range(min_row, max_row + 1) and column in range(min_col, max_col + 1)): return True, (min_row, min_col) # row_low, row_high, column_low, column_high = cell_range # if(row in xrange(row_low, row_high) and # column in xrange(column_low, column_high)): # return True, (row_low, column_low) return False, (row, column)
def format_sheet(sheet): counter = 0 while len(sheet.merged_cells.ranges) > 0: counter += 1 for cell_group in sheet.merged_cells.ranges: min_col, min_row, max_col, max_row = range_boundaries( str(cell_group)) top_left_cell_value = sheet.cell(row=min_row, column=min_col).value sheet.unmerge_cells(str(cell_group)) for row in sheet.iter_rows(min_col=min_col, min_row=min_row, max_col=max_col, max_row=max_row): for cell in row: cell.value = top_left_cell_value return counter
def __getitem__(self, key): """Convenience access by Excel style coordinates The key can be a single cell coordinate 'A1', a range of cells 'A1:D25', individual rows or columns 'A', 4 or ranges of rows or columns 'A:D', 4:10. Single cells will always be created if they do not exist. Returns either a single cell or a tuple of rows or columns. """ if isinstance(key, slice): if not all([key.start, key.stop]): raise IndexError( "{0} is not a valid coordinate or range".format(key)) key = "{0}:{1}".format(key.start, key.stop) if isinstance(key, int): key = str(key) min_col, min_row, max_col, max_row = range_boundaries(key) if not any([min_col, min_row, max_col, max_row]): raise IndexError( "{0} is not a valid coordinate or range".format(key)) if not min_row: cols = tuple(self.iter_cols(min_col, max_col)) if min_col == max_col: cols = cols[0] return cols if not min_col: rows = tuple( self.iter_rows(min_col=min_col, min_row=min_row, max_col=self.max_column, max_row=max_row)) if min_row == max_row: rows = rows[0] return rows if ":" not in key: return self._get_cell(min_row, min_col) return tuple( self.iter_rows(min_row=min_row, min_col=min_col, max_row=max_row, max_col=max_col))
def range2dataframe(op_worksheet, range_): """Converts a range of an excel sheet into a Pandas dataframe. Args: op_worksheet (:obj:'Worksheet'): The worksheet which the range is located. range_ (str): The range to convert into a dataframe. It must be in the excel standard for a range. Returns: :obj:`DataFrame`: The Pandas dataframe of the excel range. """ min_col, min_row, max_col, max_row = range_boundaries(range_) data = op_worksheet.iter_rows(min_row=min_row, min_col=min_col, max_row=max_row, max_col=max_col, values_only=True) headers = next(data) return pandas.DataFrame(data, columns=headers)
def unmerge_cell_copy_top_value(): """ :return: modified work sheet in memory """ wbook = load_workbook("./data/test2.xlsx") sheet = wbook["Consolidated"] lookup = create_merged_cell_lookup(sheet) cell_group_list = lookup.keys() for cell_group in cell_group_list: min_col, min_row, max_col, max_row = range_boundaries(str(cell_group)) sheet.unmerge_cells(str(cell_group)) for row in sheet.iter_rows(min_col=min_col, min_row=min_row, max_col=max_col, max_row=max_row): for cell in row: cell.value = lookup[cell_group] return sheet
def __init__(self, range_string=None, min_col=None, min_row=None, max_col=None, max_row=None, title=None): if range_string is not None: try: title, (min_col, min_row, max_col, max_row) = range_to_tuple(range_string) except ValueError: min_col, min_row, max_col, max_row = range_boundaries(range_string) self.min_col = min_col self.min_row = min_row self.max_col = max_col self.max_row = max_row self.title = title if min_col > max_col: fmt = "{max_col} must be greater than {min_col}" raise ValueError(fmt.format(min_col=min_col, max_col=max_col)) if min_row > max_row: fmt = "{max_row} must be greater than {min_row}" raise ValueError(fmt.format(min_row=min_row, max_row=max_row))
def create_merged_cell_lookup(sheet) -> dict: """ :param sheet: :return: the key-value pairs (dict) of merged cell and top value e.g { 'A10:A20': 'Manufacture Of Beer (In Barrels)' 'B11:B19': 'Removals' 'C11:C14': 'Taxable*' 'C15:C18: 'Tax Free' } """ merged_lookup = {} for cell_group in sheet.merged_cells.ranges: min_col, min_row, max_col, max_row = range_boundaries(str(cell_group)) if min_col == max_col: top_left_cell_value = sheet.cell(row=min_row, column=min_col).value top_left_cell_value = top_left_cell_value.replace('\n', ' ') top_left_cell_value = " ".join( re.split("\s+", top_left_cell_value, flags=re.UNICODE)) merged_lookup[str(cell_group)] = top_left_cell_value return merged_lookup
def set_border( ws, cell_range, font=Font(name="Arial", size=12, bold=True), border_style=BORDER_MEDIUM, alignment_horizontal="center", ): # font = Font(name='Arial',size=12,bold=True) border = Border( left=Side(border_style=border_style), top=Side(border_style=border_style), bottom=Side(border_style=border_style), ) alignment = Alignment(horizontal=alignment_horizontal, vertical="center", wrap_text=True) style_border_left = Style(border, alignment) border = Border( right=Side(border_style=border_style), top=Side(border_style=border_style), bottom=Side(border_style=border_style), ) alignment = Alignment(horizontal=alignment_horizontal, vertical="center", wrap_text=True) style_border_right = Style(border, alignment) border = Border(top=Side(border_style=border_style), bottom=Side(border_style=border_style)) alignment = Alignment(horizontal=alignment_horizontal, vertical="center", wrap_text=True) style_border_middle = Style(border, alignment) # row = ws.iter_rows(cell_range) min_col, min_row, max_col, max_row = range_boundaries(cell_range.upper()) # print "TEST:",min_col, min_row, max_col, max_row for index_row, rows in enumerate(ws.iter_rows(cell_range)): # for row in rows: index_column = 0 for row in rows: # print "ROW:",index_row,index_column,row if index_column == 0: Style.setStyleRow(row, style_border_left) elif index_column == max_col - min_col: Style.setStyleRow(row, style_border_right) else: Style.setStyleRow(row, style_border_middle) index_column += 1
def __getitem__(self, key): """Convenience access by Excel style coordinates The key can be a single cell coordinate 'A1', a range of cells 'A1:D25', individual rows or columns 'A', 4 or ranges of rows or columns 'A:D', 4:10. Single cells will always be created if they do not exist. Returns either a single cell or a tuple of rows or columns. """ if isinstance(key, slice): if not all([key.start, key.stop]): raise IndexError("{0} is not a valid coordinate or range".format(key)) key = "{0}:{1}".format(key.start, key.stop) if isinstance(key, int): key = str(key ) min_col, min_row, max_col, max_row = range_boundaries(key) if not any([min_col, min_row, max_col, max_row]): raise IndexError("{0} is not a valid coordinate or range".format(key)) if not min_row: cols = tuple(self.iter_cols(min_col, max_col)) if min_col == max_col: cols = cols[0] return cols if not min_col: rows = tuple(self.iter_rows(min_col=min_col, min_row=min_row, max_col=self.max_column, max_row=max_row)) if min_row == max_row: rows = rows[0] return rows if ":" not in key: return self._get_cell(min_row, min_col) return tuple(self.iter_rows(min_row=min_row, min_col=min_col, max_row=max_row, max_col=max_col))
def computeRangeCells(self,rn, sheet): x = range_boundaries(rn) cols = list(range(min(x[0], x[2]), max(x[0], x[2]))) cols.append(max(x[0], x[2])) cols = list(map(get_column_letter, cols)) rows = list(range(min(x[1], x[3]), max(x[1], x[3]))) rows.append(max(x[1], x[3])) final = [] flat_final = [] for row in rows: slice=[] for col in cols: add_str = '{}!{}{}'.format(sheet, col, row) slice.append(add_str) flat_final.append(add_str) final.append(slice) self.prec_in_range = flat_final return final
def __init__(self, worksheet): self.fieldnames = [] column_idx = [] row_iter = worksheet.iter_rows() for cell in next(row_iter): if not cell.value: # Any blank column is beyond range of data break self.fieldnames.append(cell.value) column_idx.append(cell.column) self.column_range = (column_idx[0], column_idx[-1]) # Setup rows iter min_col, min_row, max_col, max_row = range_boundaries('{0}2:{1}{2}'.format( self.column_range[0], self.column_range[1], worksheet.max_row )) self._iter_rows = worksheet.iter_rows( min_col=min_col, min_row=min_row, max_col=max_col, max_row=max_row )
def readSheet(self, file_location): wb = openpyxl.load_workbook(filename=file_location) ws = wb.worksheets[0] merged_cells = ws.merged_cells merged_cell_ranges = copy.deepcopy(merged_cells.ranges) for ranges in merged_cell_ranges: m_col, m_row, x_col, x_row = range_boundaries(str(ranges)) cell_value = "" for row in ws.iter_rows(min_col=m_col, min_row=m_row, max_col=x_col, max_row=x_row): for cell in row: if cell.value != None: cell_value = cell.value ws.unmerge_cells(str(ranges)) for row in ws.iter_rows(min_col=m_col, min_row=m_row, max_col=x_col, max_row=x_row): for cell in row: cell.value = cell_value max_col = ws.max_column max_row = ws.max_row data = [] for row in ws.iter_rows(min_col=1, min_row=1, max_col=ws.max_column, max_row=ws.max_row): data_col = [] for cell in row: data_col.append(cell.value) data.append(data_col) return data
def __covertStringToColumnAndRow(self): # print(self.sheet.merged_cell_ranges) self.mergeCellsColumnAndRow = [ utils.range_boundaries(x) for x in self.sheet.merged_cell_ranges ]
def sheets(self): arr = [] for number, sheet in enumerate(self.book): for cell_group in sheet.merged_cells.ranges: min_col, min_row, max_col, max_row = range_boundaries(str(cell_group)) vals = set() for row in sheet.iter_rows(min_col=min_col, min_row=min_row, max_col=max_col, max_row=max_row, values_only=True): for cell in row: if cell is not None: vals.add(cell.strip()) sheet.unmerge_cells(str(cell_group)) if vals: top_left_cell_value = vals.pop() for row in sheet.iter_rows(min_col=min_col, min_row=min_row, max_col=max_col, max_row=max_row): for cell in row: cell.value = top_left_cell_value cap = [] head = True for index, row in enumerate(sheet.iter_rows(values_only=True)): index = index + 1 text = " ".join(c for c in row if c is not None) text = re_sp.sub(" ", text).strip().lower() if len(text)==0: continue c1 = (row[0] or "") c1 = c1.strip() s1 = re_sp.sub(" ", c1).strip().lower() if head and len(s1)==0: continue if head: if s1 in ("categoría", "información básica", "tipo de solución"): if len(cap)==0: continue head = False arr.append(Munch( number=number, title=cap[0], description="\n".join(cap[1:]), data=sheet, start=index, end=index+1, min_col=1 )) cap = None else: cap.extend(s.strip() for s in c1.split("\n") if s.strip()) if arr: c2 = (row[1] or "") c2 = c2.strip() s2 = re_sp.sub(" ", c2).strip().lower() if s2 == "tipo de solución": arr[-1].min_col=2 if s1 == "tipo de solución": arr[-1].start=index+1 if not head: arr[-1].end=index def myindex(sheet, arr, field, *args): if field in sheet: return for e in args: e = e.lower() if e in arr: sheet[field] = arr.index(e) return for sheet in arr: for row in sheet.data.iter_rows(min_col=sheet.min_col, min_row=sheet.start, max_row=sheet.end, values_only=True): if row[0] is not None: break sheet.start = sheet.start + 1 for row in sheet.data.iter_rows(min_col=sheet.min_col, max_row=sheet.start, values_only=True): row = [(r or "").strip().lower() for r in row] myindex(sheet, row, "ambito", "ámbito") myindex(sheet, row, "organismo", "organismo") myindex(sheet, row, "enlace", "enlace + información") myindex(sheet, row, "comun", "Servicio declarado como compartido (AGE)", "Servicio declarado como compartido") print(sheet.data.title, sheet.start, sheet.end) sheet.rows = [] tipo = None for row in sheet.data.iter_rows(min_col=sheet.min_col, min_row=sheet.start, max_row=sheet.end, values_only=True): row = [(r or "").strip() for r in row] if row[0] not in (None, ""): tipo = row[0] nombre, desc = row[1:3] nombre = nombre.rstrip(".") nombre = re_dash.sub(" - ", nombre) nombre = nombre.replace(" ", " ") urls = [u for u in re.split(r"\s+", row[sheet.enlace]) if u.startswith("http")] ambito = row[sheet.ambito] ambito = re.sub(r"[Tt]odas (las )?AAPP", "AAPP", ambito) organismo = row[sheet.organismo] if desc and not desc.endswith("."): desc = desc + "." comun = False if sheet.get("comun") is not None: comun = row[sheet.comun].lower() in ("si", "sí") sheet.rows.append(Item( tipo = tipo, nombre = nombre, ambito = ambito, organismo = organismo, urls=urls, descripcion=desc, comun=comun )) #sheet.rows=list(range(sheet.start+1, sheet.end+1)) #sheet.data.filter(row_indices=sheet.rows) #sheet.data.name_columns_by_row(sheet.start) return arr
def __covertStringToColumnAndRow(self): # print(self.sheet.merged_cell_ranges) self.mergeCellsColumnAndRow = [utils.range_boundaries(x) for x in self.sheet.merged_cell_ranges];
def boundaries(self): return range_boundaries(self.ref)
def __setMergeCellsCharAndIndex(self): beifen = [utils.range_boundaries(x) for x in self.sheet.merged_cell_ranges]; for i in beifen: t0 = utils.get_column_letter(i[0]); t2 = utils.get_column_letter(i[2]); self.mergeCellsCharAndIndex.append((t0,i[1],t2,i[3]));
def read_sheet_data(self, column_names=None, get_column_names_from_header_row=False, cell_range=None, trim=False): """Reads all the data from the active sheet. This keyword can output the sheet data in two formats: - _As a list of dictionaries_. In the case column names are supplied or obtained (see relevant parameters described below), the rows will be represented through dictionaries, of which the keys will correspond to the column names. - _As a list of lists_. If no column names are provided or obtained, each row will be read from the sheet as a list, and the returned data will, therefore, be a list of all such lists. To use column names the following two parameters can be used: - If ``column_names`` is provided it is expected to be a list which will be used to name the columns in the supplied order. - If ``get_column_names_from_header_row`` is ``True``, the column names will be read from the first row in the sheet. In this case, the first row will not be read as part of the sheet data. _NOTE_: If both parameters are supplied, the `column_names`` list will have precedence. You will get a warning in your log when this situation occurs though. Use ``cell_range`` if you want to get data from only that range in the sheet, rather than all of the data in it. The expected input form is in _A1 Notation_. For example: ``A1:B3``. If ``trim`` is ``True``, all cell values are trimmed, i.e. the surrounding whitespace is removed. Examples: | Read entire sheet with column names from header row | | | | | Open workbook | ${PROPER EXCEL FILE} | # no alias provided: defaulting to file path | | | Switch sheet | Sheet 1 (with header) | | | | @{data sheet}= | Read sheet data | get_column_names_from_header_row=${TRUE} | | | :FOR | ${row} | IN | @{data sheet} | | \ | Log list | ${row} | | | Close workbook | ${PROPER EXCEL FILE} | | | | Read sheet range without column names (trimmed) | | | | | Open workbook | ${PROPER EXCEL FILE} | first excel file | | | Switch sheet | Sheet 1 (with header) | | | | @{data sheet}= | Read sheet data | cell_range=A1:B3 | trim=${TRUE} | | :FOR | ${row} | IN | @{data sheet} | | \ | Log dictionary | ${row} | | | Close workbook | | | | For more examples check out the included test suite. """ sheet = self.active_workbook.active skip_first_row = False if get_column_names_from_header_row: if column_names: logger.warning("Both the `column_names' and " "`get_column_names_from_header_row' " "parameters were supplied. Using " "`column_names' and ignoring the other.") else: skip_first_row = True column_names = self._get_column_names_from_header_row(sheet) if cell_range: min_col, min_row, max_col, max_row = range_boundaries(cell_range) row_iterator = sheet.iter_rows(min_col=min_col, min_row=min_row, max_col=max_col, max_row=max_row) if column_names: column_names = column_names[min_col - 1:max_col] else: row_iterator = sheet.iter_rows() if skip_first_row: next(row_iterator) # Skip first row in the case of a header. if column_names: sheet_data = [] for row in row_iterator: row_data = {} for i, cell in enumerate(row): try: row_data[column_names[i]] =\ self.read_from_cell(None, cell_obj=cell, trim=trim) except IndexError: raise TooFewColumnNamesSuppliedException if not all(value is None for value in row_data.itervalues()): sheet_data.append(row_data) else: sheet_data = [] for row in row_iterator: row_data = [ self.read_from_cell(None, cell_obj=cell, trim=trim) for cell in row ] if not all(value is None for value in row_data): sheet_data.append(row_data) return sheet_data
def insert_rows(self, row_idx, cnt, above=False, copy_style=True, copy_merged_columns=True, fill_formulae=True): """Inserts new (empty) rows into worksheet at specified row index. :param row_idx: Row index specifying where to insert new rows. :param cnt: Number of rows to insert. :param above: Set True to insert rows above specified row index. :param copy_style: Set True if new rows should copy style of immediately above row. :param fill_formulae: Set True if new rows should take on formula from immediately above row, filled with references new to rows. Usage: * insert_rows(2, 10, above=True, copy_style=False) """ CELL_RE = re.compile("(?P<col>\$?[A-Z]+)(?P<row>\$?\d+)") row_idx = row_idx - 1 if above else row_idx def replace(m): row = m.group('row') prefix = "$" if row.find("$") != -1 else "" row = int(row.replace("$", "")) # CHANGED: row += cnt if row > row_idx else 0 row += cnt if row >= row_idx else 0 return m.group('col') + prefix + str(row) # First, we shift all cells down cnt rows... old_cells = set() old_fas = set() new_cells = dict() new_fas = dict() for c in self._cells.values(): old_coor = c.coordinate # Shift all references to anything below row_idx if c.data_type == Cell.TYPE_FORMULA: c.value = CELL_RE.sub(replace, c.value) # Here, we need to properly update the formula references to reflect new row indices if old_coor in self.formula_attributes and 'ref' in self.formula_attributes[ old_coor]: self.formula_attributes[old_coor]['ref'] = CELL_RE.sub( replace, self.formula_attributes[old_coor]['ref']) # Do the magic to set up our actual shift if c.row > row_idx: old_coor = c.coordinate old_cells.add((c.row, c.col_idx)) c.row += cnt new_cells[(c.row, c.col_idx)] = c if old_coor in self.formula_attributes: old_fas.add(old_coor) fa = self.formula_attributes[old_coor].copy() new_fas[c.coordinate] = fa for coor in old_cells: del self._cells[coor] self._cells.update(new_cells) for fa in old_fas: del self.formula_attributes[fa] self.formula_attributes.update(new_fas) # Next, we need to shift all the Row Dimensions below our new rows down by cnt... # CHANGED: for row in range(len(self.row_dimensions) - 1 + cnt, row_idx + cnt, -1): Cause: "self.row_dimensions" is a dict, and it can start from other number than 1 for row in range(list(self.row_dimensions)[-1] + cnt, row_idx + cnt, -1): new_rd = copy.copy(self.row_dimensions[row - cnt]) new_rd.index = row self.row_dimensions[row] = new_rd del self.row_dimensions[row - cnt] # Now, create our new rows, with all the pretty cells row_idx += 1 for row in range(row_idx, row_idx + cnt): # Create a Row Dimension for our new row new_rd = copy.copy(self.row_dimensions[row - 1]) new_rd.index = row self.row_dimensions[row] = new_rd # CHANGED: for col in range(1,self.max_column): for col in range(self.max_column): # CHANGED: col = get_column_letter(col) col = get_column_letter(col + 1) cell = self.cell('%s%d' % (col, row)) cell.value = None source = self.cell('%s%d' % (col, row - 1)) if copy_style: #print("Copying style from cell %s%d to %s%d" % (col, orig_row, col, row)) cell.number_format = source.number_format cell.font = source.font.copy() cell.alignment = source.alignment.copy() cell.border = source.border.copy() cell.fill = source.fill.copy() if fill_formulae and source.data_type == Cell.TYPE_FORMULA: s_coor = source.coordinate if s_coor in self.formula_attributes and 'ref' not in self.formula_attributes[ s_coor]: fa = self.formula_attributes[s_coor].copy() self.formula_attributes[cell.coordinate] = fa #print("Copying formula from cell %s%d to %s%d"%(col,row-1,col,row)) cell.value = re.sub("(\$?[A-Z]{1,3}\$?)%d" % (row - 1), lambda m: m.group(1) + str(row), source.value) cell.data_type = Cell.TYPE_FORMULA # Check for Merged Cell Ranges that need to be expanded to contain new cells for cr_idx, cr in enumerate(self.merged_cell_ranges): self.merged_cell_ranges[cr_idx] = CELL_RE.sub(replace, cr) # Merge columns of the new rows in the same way row above does if copy_merged_columns: for cr in self.merged_cell_ranges: min_col, min_row, max_col, max_row = range_boundaries(cr) if max_row == min_row == row_idx - 1: for row in range(row_idx, row_idx + cnt): newCellRange = get_column_letter(min_col) + str( row) + ":" + get_column_letter(max_col) + str(row) self.merge_cells(newCellRange)
@author: rudolf.kenyeres """ import openpyxl, os from openpyxl.utils import range_boundaries import pyjokes excelFiles = [] for filename in os.listdir('.'): if filename.endswith('.xlsx'): excelFiles.append(filename) excelFiles.sort(key=str.lower) #Intitializes workbooks print('Searching data files...') min_col, min_row, max_col, max_row = range_boundaries("A:CA") keyWord = [ item for item in input( "Enter the examined keywords, separate by commas : ").split(",") ] KeywordIndex = [ int(item) for item in input( "Enter the examined column index, which should be containing the keyword(s): " ).split(",") ] minimum = int(input("Enter the minimum value: ")) MinimumIndex = [ int(item) for item in input( "Enter the examined column index, which should be greather than the minimum: " ).split(",")
def exportCSV(self, sheetName, outFname, rangeString=None, colNames=0, maxrows=99999999, encoding='utf-8', stripFields=True, fPRINT=False): logger = self._SetLogger(__name__) if fPRINT: print("Converting sheetName: [{0}] to CSV file: [{1}]." .format(sheetName, outFname)) ws = self._wb.get_sheet_by_name(sheetName) nRows = ws.max_row nCols = ws.max_column # print (type(rangeString), rangeString) if rangeString: minCol, minRow, maxCol, maxRow = xls.range_boundaries(rangeString) else: minCol, minRow, maxCol, maxRow = 1, 1, ws.max_column, ws.max_row fullRange = get_column_letter(minCol) + str(minRow) + ':' + get_column_letter(maxCol) + str(maxRow) logger.info(" full Range: {0}".format(fullRange)) minCol -= 1 # col parte da '0' maxCol -= 1 # col parte da '0' # --------------------------------- # - grosso modo può andare..... # --------------------------------- dataList = [] # dataListOfList = [] for indexRow, row in enumerate(ws.rows): # - prendiamo tutte le righe previste nel range if minRow <= indexRow < maxRow: # - ...e lo stesso per le colonne if indexRow >= colNames: line = [] for indexCol, cell in enumerate(row): if minCol <= indexCol <= maxCol: val = cell.value if cell.value else '' # if stripFields and isinstance(val, str): val=val.strip() line.append(val) else: continue # costruiamo la riga ... con i valori delle celle appena lette lineStr = line[0] for item in line[1:]: if isinstance(item, str) and stripFields: lineStr = '{0};{1}'.format(lineStr, item.strip()) else: lineStr = '{0};{1}'.format(lineStr, item) # ... per inserirla nell'array dataList.append(lineStr) if self._fDEBUG: for index, line in enumerate(dataList): print ('{0:5} - {1}'.format(index, line)) if outFname: FILE = open(outFname, "w", encoding=encoding) for line in dataList: line = "{0}{1}".format(line, '\n') FILE.write(line) # con Python3 bisogna convertirlo in bytes FILE.close() logger.info("..... file: {FILE} has been written".format(FILE=outFname)) if fPRINT: print("..... file: {FILE} has been written".format(FILE=outFname)) if self._fDEBUG: print() print(" full Range: {0}".format(fullRange)) print(" file {0} has been created".format(outFname)) print() for item in dataList: print (item) print () self.data = dataList