def emit(self): # resolve the range into cells sheet = self.cell and self.cell.sheet or '' if '!' in self.value: sheet = '' try: addr_str = self.value.replace('$', '') address = AddressRange.create(addr_str, sheet=sheet, cell=self.cell) except ValueError: # check for table relative address table_name = None if self.cell: excel = self.cell.excel if excel and '[' in addr_str: table_name = excel.table_name_containing(self.cell.address) if not table_name: logging.getLogger('pycel').warning( 'Table Name not found: {}'.format(addr_str)) return '"{}"'.format(NAME_ERROR) addr_str = '{}{}'.format(table_name, addr_str) address = AddressRange.create( addr_str, sheet=self.cell.address.sheet, cell=self.cell) template = '_R_("{}")' if address.is_range else '_C_("{}")' return template.format(address)
def test_address_range(): a = AddressRange('a1:b2') b = AddressRange('A1:B2') c = AddressRange(a) assert a == b assert b == c assert b == AddressRange(b) assert b == AddressRange.create(b) assert AddressRange('sh!a1:b2') == AddressRange(a, sheet='sh') assert AddressCell('C13') == AddressCell('R13C3') with pytest.raises(ValueError): AddressRange(AddressRange('sh!a1:b2'), sheet='sheet') a = AddressRange('A:A') assert 'A' == a.start.column assert 'A' == a.end.column assert 0 == a.start.row assert 0 == a.end.row b = AddressRange('1:1') assert '' == b.start.column assert '' == b.end.column assert 1 == b.start.row assert 1 == b.end.row
def evaluate(self, address): """ evaluate a cell or cells in the spreadsheet :param address: str, AddressRange, AddressCell or a tuple or list or iterable of these three :return: evaluated value/values """ if str(address) not in self.cell_map: if list_like(address): if not isinstance(address, (tuple, list)): address = tuple(address) # process a tuple or list of addresses return type(address)(self.evaluate(c) for c in address) address = AddressRange.create(address) # get the sheet if not specified if not address.has_sheet: address = AddressRange( address, sheet=self.excel.get_active_sheet_name()) if address.address not in self.cell_map: self._gen_graph(address.address) result = self._evaluate(str(address)) if isinstance(result, tuple): # trim excess dimensions if len(result[0]) == 1: result = tuple(row[0] for row in result) if len(result) == 1: result = result[0] return result
def test_row(address, expected): try: address = AddressRange.create(address) except ValueError: pass result = row(address) if expected is None: assert 1 == next(iter(result)) else: assert expected == result
def set_value(self, address, value, set_as_range=False): """ Set the value of one or more cells or ranges :param address: `str`, `AddressRange`, `AddressCell` or a tuple, list or an iterable of these three :param value: value to set. This can be a value or a tuple/list which matches the shapes needed for the given address/addresses :param set_as_range: With a single range address and a list like value, set to true to set the entire rnage to the inserted list. """ if list_like(value) and not set_as_range: value = tuple(flatten(value)) if list_like(address): address = (AddressCell(addr) for addr in flatten(address)) else: address = flatten(AddressRange(address).resolve_range) address = tuple(address) assert len(address) == len(value) for addr, val in zip(address, value): self.set_value(addr, val) return elif address not in self.cell_map: address = AddressRange.create(address).address assert address in self.cell_map if set_as_range and list_like(value) and not ( value and list_like(value[0])): value = (value, ) cell_or_range = self.cell_map[address] if cell_or_range.value != value: # pragma: no branch # need to be able to 'set' an empty cell if cell_or_range.value is None: cell_or_range.value = value # reset the node + its dependencies self._reset(cell_or_range) # set the value cell_or_range.value = value
def connect(self): self.workbook = load_workbook(self.filename) self.workbook_dataonly = load_workbook(self.filename, data_only=True, read_only=True) # expand array formulas for ws in self.workbook: for address, props in ws.formula_attributes.items(): if props.get('t') != 'array': continue # pragma: no cover # get the reference address for the array formula ref_addr = AddressRange(props.get('ref')) if isinstance(ref_addr, AddressRange): formula = ws[address].value for i, row in enumerate(ref_addr.rows, start=1): for j, addr in enumerate(row, start=1): ws[addr.coordinate] = ARRAY_FORMULA_FORMAT % ( formula[1:], i, j, *ref_addr.size)
def test_trim_cells_range(excel): excel_compiler = ExcelCompiler(excel=excel) input_addrs = [AddressRange('trim-range!D4:E4')] output_addrs = ['trim-range!B2'] old_value = excel_compiler.evaluate(output_addrs[0]) excel_compiler.trim_graph(input_addrs, output_addrs) excel_compiler._to_text() excel_compiler = ExcelCompiler._from_text(excel_compiler.filename) assert old_value == excel_compiler.evaluate(output_addrs[0]) excel_compiler.set_value(input_addrs[0], [5, 6]) assert old_value - 1 == excel_compiler.evaluate(output_addrs[0]) excel_compiler.set_value(input_addrs[0], [4, 6]) assert old_value - 2 == excel_compiler.evaluate(output_addrs[0]) excel_compiler.set_value(tuple(next(input_addrs[0].rows)), [5, 6]) assert old_value - 1 == excel_compiler.evaluate(output_addrs[0])
def get_range(self, address): if not address.is_bounded_range: if not address.is_range: return self._get_cell(address) else: # this is a unbounded range to range mapping, disassemble cell = self._get_cell(address) formula = cell.formula assert formula.startswith(REF_START) assert formula.endswith(REF_END) ref_addr = formula[len(REF_START):-len(REF_END)] return self.get_range(AddressRange(ref_addr)) # need to map col or row ranges to a specific range addresses = address.resolve_range cells = [[self._get_cell(addr) for addr in row] for row in addresses] values = [[c.values for c in row] for row in cells] return ExcelOpxWrapper.RangeData(address, None, values)
def _to_text(self, filename=None, is_json=False): """Serialize to a json/yaml file""" extra_data = {} if self.extra_data is None else self.extra_data def cell_value(a_cell): if a_cell.formula and a_cell.formula.python_code: return '=' + a_cell.formula.python_code else: return a_cell.value extra_data.update(dict( excel_hash=self._excel_file_md5_digest, cell_map=dict(sorted( ((addr, cell_value(cell)) for addr, cell in self.cell_map.items() if cell.serialize), key=lambda x: AddressRange(x[0]).sort_key )), )) if not filename: filename = self.filename + ('.json' if is_json else '.yml') # hash the current file to see if this function makes any changes existing_hash = (self._compute_file_md5_digest(filename) if os.path.exists(filename) else None) if not is_json: with open(filename, 'w') as f: ymlo = YAML() ymlo.width = 120 ymlo.dump(extra_data, f) else: with open(filename, 'w') as f: json.dump(extra_data, f, indent=4) del extra_data['cell_map'] # hash the newfile, return True if it changed, this is only reliable # on pythons which have ordered dict (CPython 3.6 & python 3.7+) return (existing_hash is None or existing_hash != self._compute_file_md5_digest(filename))
def connect(self): self.workbook = load_workbook(self.filename) self.workbook_dataonly = load_workbook(self.filename, data_only=True, read_only=True) # expand array formulas for ws in self.workbook: for address, props in ws.formula_attributes.items(): if props.get('t') != 'array': continue # pragma: no cover # get the reference address for the array formula ref_addr = AddressRange(props.get('ref')) if isinstance(ref_addr, AddressRange): # Single cell array formulas can be ignored formula = ws[address].value for i, row in enumerate(ref_addr.rows, start=1): for j, addr in enumerate(row, start=1): ws[addr.coordinate] = ARRAY_FORMULA_FORMAT % ( formula[1:], i, j, *ref_addr.size) # ::HACK:: this is only needed because openpyxl does not define # iter_cols for read only workbooks def _iter_cols(self, min_col=None, max_col=None, min_row=None, max_row=None, values_only=False): # In the case of lookup for something like C:D, openpyxl # attempts to use iter_cols() which is not defined for read_only yield from zip(*self.iter_rows(min_col=min_col, max_col=max_col)) import types for sheet in self.workbook_dataonly: sheet.iter_cols = types.MethodType(_iter_cols, sheet)
def _from_text(cls, filename, is_json=False): """deserialize from a json/yaml file""" if not is_json: if not filename.split('.')[-1].startswith('y'): filename += '.yml' else: if not filename.endswith('.json'): # pragma: no branch filename += '.json' with open(filename, 'r') as f: data = YAML().load(f) excel = _CompiledImporter(filename) excel_compiler = cls(excel=excel) excel.compiler = excel_compiler for address, python_code in data['cell_map'].items(): lineno = data['cell_map'].lc.data[address][0] + 1 address = AddressRange(address) excel.value = python_code excel_compiler._make_cells(address) formula = excel_compiler.cell_map[address.address].formula if formula is not None: formula.lineno = lineno formula.filename = filename excel_compiler._process_gen_graph() del data['cell_map'] excel_compiler._excel_file_md5_digest = data['excel_hash'] del data['excel_hash'] excel_compiler.extra_data = data excel_compiler.excel = None return excel_compiler
def conditional_format(self, address): """ Return the conditional formats applicable for this cell """ address = AddressCell(address) all_formats = self.workbook[address.sheet].conditional_formatting formats = (cf for cf in all_formats if address.coordinate in cf) rules = [] for cf in formats: origin = AddressRange(cf.cells.ranges[0].coord).start row_offset = address.row - origin.row col_offset = address.col_idx - origin.col_idx for rule in cf.rules: if rule.formula: trans = Translator( '={}'.format(rule.formula[0]), origin.coordinate) formula = trans.translate_formula( row_delta=row_offset, col_delta=col_offset) rules.append(self.CfRule( formula=formula, priority=rule.priority, dxf_id=rule.dxfId, dxf=rule.dxf, stop_if_true=rule.stopIfTrue, )) return sorted(rules, key=lambda x: x.priority)
u'In Dusseldorf', u'My-Sheet', u"Demande d'autorisation", "1sheet", ".sheet", '"', ]) def test_quoted_address(sheet_name): addr = AddressCell('A2', sheet=sheet_name) assert addr.quoted_address == '{}!A2'.format(addr.quote_sheet(sheet_name)) @pytest.mark.parametrize('address, expected', ( ('s!D2', 's!$D$2'), ('s!D2:F4', 's!$D$2:$F$4'), (AddressRange("D2:F4", sheet='sh 1'), "'sh 1'!$D$2:$F$4"), )) def test_address_absolute(address, expected): assert AddressRange.create(address).abs_address == expected def test_split_sheetname(): assert ('', 'B1') == split_sheetname('B1') assert ('sheet', 'B1') == split_sheetname('sheet!B1') assert ('', 'B1:C2') == split_sheetname('B1:C2') assert ('sheet', 'B1:C2') == split_sheetname('sheet!B1:C2') assert ('sheet', 'B1:C2') == split_sheetname('sheet!B1:C2') assert ('SheetA', 'A1:A9') == split_sheetname("SheetA!A1:'SheetA'!A9")
def get_formula_from_range(self, address): addr = AddressRange.create(address) found = addr.column in self.columns and str(addr.row) in self.rows return '=linest()' if found else ''
def test_cell_range_repr(excel): cell_range = _CellRange( ExcelWrapper.RangeData(AddressRange('sheet!A1:B1'), '', ((0, 0), ))) assert 'sheet!A1:B1' == repr(cell_range)
def test_evaluate_entire_row_column(excel_compiler): value = excel_compiler.evaluate(AddressRange('Sheet1!A:A')) expected = excel_compiler.evaluate(AddressRange('Sheet1!A1:A18')) assert value == expected assert len(value) == 18 assert not list_like(value[0]) value = excel_compiler.evaluate(AddressRange('Sheet1!1:1')) expected = excel_compiler.evaluate(AddressRange('Sheet1!A1:D1')) assert value == expected assert len(value) == 4 assert not list_like(value[0]) value = excel_compiler.evaluate(AddressRange('Sheet1!A:B')) expected = excel_compiler.evaluate(AddressRange('Sheet1!A1:B18')) assert value == expected assert len(value) == 18 assert len(value[0]) == 2 value = excel_compiler.evaluate(AddressRange('Sheet1!1:2')) expected = excel_compiler.evaluate(AddressRange('Sheet1!A1:D2')) assert value == expected assert len(value) == 2 assert len(value[0]) == 4 # now from the text based file excel_compiler._to_text() text_excel_compiler = ExcelCompiler._from_text(excel_compiler.filename) value = text_excel_compiler.evaluate(AddressRange('Sheet1!A:A')) expected = text_excel_compiler.evaluate(AddressRange('Sheet1!A1:A18')) assert value == expected assert len(value) == 18 assert not list_like(value[0]) value = text_excel_compiler.evaluate(AddressRange('Sheet1!1:1')) expected = text_excel_compiler.evaluate(AddressRange('Sheet1!A1:D1')) assert value == expected assert len(value) == 4 assert not list_like(value[0]) value = text_excel_compiler.evaluate(AddressRange('Sheet1!A:B')) expected = text_excel_compiler.evaluate(AddressRange('Sheet1!A1:B18')) assert len(value) == 18 assert len(value[0]) == 2 assert value == expected value = text_excel_compiler.evaluate(AddressRange('Sheet1!1:2')) expected = text_excel_compiler.evaluate(AddressRange('Sheet1!A1:D2')) assert value == expected assert len(value) == 2 assert len(value[0]) == 4
def test_address_range_errors(): with pytest.raises(ValueError): AddressRange('B32:B')
def test_is_range(): assert AddressRange('a1:b2').is_range assert not AddressRange('a1').is_range
def get_formula_from_range(self, address): addr = AddressRange.create(address) found = addr.column in self.columns and str(addr.row) in self.rows return '=linest()' if found else ''
def test_address_range_or(left, right, result): assert AddressRange(left) | AddressRange(right) == AddressRange(result)
assert ((0, 1),) == find_corresponding_index((list('ABC'), ), 'B') assert ((0, 1), (0, 2)) == find_corresponding_index((list('ABB'), ), 'B') assert ((0, 1), (0, 2)) == find_corresponding_index((list('ABB'), ), '<>A') assert () == find_corresponding_index((list('ABB'), ), 'D') with pytest.raises(TypeError): find_corresponding_index('ABB', '<B') with pytest.raises(ValueError): find_corresponding_index((list('ABB'), ), None) @pytest.mark.parametrize( 'value, expected', ( ('xyzzy', False), (AddressRange('A1:B2'), False), (AddressCell('A1'), False), ([1, 2], True), ((1, 2), True), ({1: 2, 3: 4}, True), ((a for a in range(2)), True), ) ) def test_list_like(value, expected): assert list_like(value) == expected if expected: assert_list_like(value) else: with pytest.raises(TypeError, match='Must be a list like: '): assert_list_like(value)
def trim_graph(self, input_addrs, output_addrs): """Remove unneeded cells from the graph""" input_addrs = tuple(AddressRange(addr).address for addr in input_addrs) output_addrs = tuple(AddressRange(addr) for addr in output_addrs) # 1) build graph for all needed outputs self._gen_graph(output_addrs) # 2) walk the dependant tree (from the inputs) and find needed cells needed_cells = set() def walk_dependents(cell): """passed in a _Cell or _CellRange""" for child_cell in self.dep_graph.successors(cell): child_addr = child_cell.address.address if child_addr not in needed_cells: needed_cells.add(child_addr) walk_dependents(child_cell) missing_dependants = set() for addr in input_addrs: try: if addr in self.cell_map: walk_dependents(self.cell_map[addr]) msg = '' else: msg = 'warning', f'Address {addr} not found in cell_map' except nx.exception.NetworkXError as exc: if AddressRange(addr) not in output_addrs: msg = 'error', f'{exc}: which usually means no outputs are dependant on it.' else: msg = 'warning', str(exc) if msg: missing_dependants.add((addr, *msg)) if missing_dependants: for addr, level, msg in missing_dependants: getattr(self.log, level)(f"Input address {addr}: {msg}") if any(m[1] != 'warning' for m in missing_dependants): raise ValueError('\n' + '\n'.join( map(str, sorted(missing_dependants, key=lambda x: x[2])))) # even unconnected output addresses are needed for addr in output_addrs: needed_cells.add(addr.address) # 3) walk the precedent tree (from the output) and trim unneeded cells processed_cells = set() def walk_precedents(cell): for child_address in (a.address for a in cell.needed_addresses): if child_address not in processed_cells: # pragma: no branch processed_cells.add(child_address) child_cell = self.cell_map[child_address] if child_address in needed_cells or ':' in child_address: walk_precedents(child_cell) else: # trim this cell, now we will need only its value needed_cells.add(child_address) child_cell.formula = None self.log.debug(f'Trimming {child_address}') for addr in output_addrs: walk_precedents(self.cell_map[addr.address]) # 4) check for any buried (not leaf node) inputs for addr in input_addrs: cell = self.cell_map.get(addr) if cell and getattr(cell, 'formula', None): self.log.info(f"{addr} is not a leaf node") # 5) remove unneeded cells cells_to_remove = tuple(addr for addr in self.cell_map if addr not in needed_cells) for addr in cells_to_remove: del self.cell_map[addr]
def test_array_formulas(excel, address, values, formulas): result = excel.get_range(address) assert result.address == AddressRange(address) assert result.values == values assert result.formulas == formulas
('1.1', 1.1), ('xyzzy', VALUE_ERROR), )) def test_math_wrap(value, result): assert apply_meta(excel_math_func(lambda x: x), name_space={})[0](value) == result def test_math_wrap_domain_error(): func = apply_meta(excel_math_func(lambda x: math.log(x)), name_space={})[0] assert func(-1) == NUM_ERROR @pytest.mark.parametrize('value, result', ( ((1, 2, 3), (1, 2, 3)), ((AddressRange('A1:B1'), ) * 3, ('R:A1:B1', AddressRange('A1:B1'), 'R:A1:B1')), ((AddressCell('A1'), ) * 3, ('C:A1', AddressCell('A1'), 'C:A1')), )) def test_ref_wrap(value, result): def r_test(*args): return args name_space = locals() name_space['_R_'] = lambda a: f'R:{a}' name_space['_C_'] = lambda a: f'C:{a}' func = apply_meta(excel_helper(ref_params=1)(r_test), name_space=name_space)[0] assert func(*value) == result
def trim_graph(self, input_addrs, output_addrs): """Remove unneeded cells from the graph""" input_addrs = tuple(AddressRange(addr).address for addr in input_addrs) output_addrs = tuple(AddressRange(addr) for addr in output_addrs) # 1) build graph for all needed outputs self._gen_graph(output_addrs) # 2) walk the dependant tree (from the inputs) and find needed cells needed_cells = set() def walk_dependents(cell): """passed in a _Cell or _CellRange""" for child_cell in self.dep_graph.successors(cell): child_addr = child_cell.address.address if child_addr not in needed_cells: needed_cells.add(child_addr) walk_dependents(child_cell) try: for addr in input_addrs: if addr in self.cell_map: walk_dependents(self.cell_map[addr]) else: self.log.warning( 'Address {} not found in cell_map'.format(addr)) except nx.exception.NetworkXError as exc: if AddressRange(addr) not in output_addrs: raise ValueError('{}: which usually means no outputs ' 'are dependant on it.'.format(exc)) # even unconnected output addresses are needed for addr in output_addrs: needed_cells.add(addr.address) # 3) walk the precedent tree (from the output) and trim unneeded cells processed_cells = set() def walk_precedents(cell): for child_address in (a.address for a in cell.needed_addresses): if child_address not in processed_cells: # pragma: no branch processed_cells.add(child_address) child_cell = self.cell_map[child_address] if child_address in needed_cells or ':' in child_address: walk_precedents(child_cell) else: # trim this cell, now we will need only its value needed_cells.add(child_address) child_cell.formula = None self.log.debug('Trimming {}'.format(child_address)) for addr in output_addrs: walk_precedents(self.cell_map[addr.address]) # 4) check for any buried (not leaf node) inputs for addr in input_addrs: cell = self.cell_map.get(addr) if cell and getattr(cell, 'formula', None): self.log.info("{} is not a leaf node".format(addr)) # 5) remove unneeded cells cells_to_remove = tuple(addr for addr in self.cell_map if addr not in needed_cells) for addr in cells_to_remove: del self.cell_map[addr]
def test_address_range_multi_colon(address, expected): a_range = AddressRange(address) assert a_range == AddressRange(expected)
def test_address_range_and(left, right, result): assert AddressRange(left) & AddressRange(right) == AddressRange(result)
def test_address_range_intersection(left, right, result): result = AddressRange(result) assert AddressRange(left) & AddressRange(right) == result assert AddressRange(left) & right == result assert left & AddressRange(right) == result
def test_address_range_contains(a_range, address, expected): a_range = AddressRange(a_range) assert expected == (address in a_range) address = AddressCell(address) assert expected == (address in a_range)
def test_address_range_union(left, right, result): result = AddressRange(result) assert AddressRange(left)**AddressRange(right) == result assert AddressRange(left)**right == result assert left**AddressRange(right) == result
def test_address_range(): a = AddressRange('a1:b2') b = AddressRange('A1:B2') c = AddressRange(a) assert a == b assert b == c assert b == AddressRange(b) assert b == AddressRange.create(b) assert AddressRange('sh!a1:b2') == AddressRange(a, sheet='sh') assert AddressCell('C13') == AddressCell('R13C3') with pytest.raises(ValueError): AddressRange(AddressRange('sh!a1:b2'), sheet='sheet') a = AddressRange('A:A') assert 'A' == a.start.column assert 'A' == a.end.column assert 0 == a.start.row assert 0 == a.end.row b = AddressRange('1:1') assert '' == b.start.column assert '' == b.end.column assert 1 == b.start.row assert 1 == b.end.row
def test_address_absolute(address, expected): assert AddressRange.create(address).abs_address == expected
def test_array_formula_context_fit_to_range(address, value, result): if address is not None: address = AddressRange(address, sheet='s') with in_array_formula_context(address): assert in_array_formula_context.fit_to_range(value) == result
def test_multi_area_range(address, string, mar): assert address == tuple(mar.resolve_range) assert not mar.is_unbounded_range assert address[0][0] in mar assert AddressRange('Z99') not in mar assert str(mar) == string
def test_column(address, result): try: address = AddressRange.create(address) except ValueError: pass assert result == column(address)
def resolve_range(self): return AddressRange( (self.address.start.col_idx, self.address.start.row, self.address.start.col_idx + len(self.values[0]) - 1, self.address.start.row + len(self.values) - 1), sheet=self.address.sheet).resolve_range