def emit(self, ast, context=None, pointer=False): if isinstance(self.tvalue, ExcelError): if self.debug: print('WARNING: Excel Error Code found', self.tvalue) return self.tvalue is_a_range = False is_a_named_range = self.tsubtype == "named_range" if is_a_named_range: my_str = '"%s"' % self.token.tvalue else: rng = self.tvalue.replace('$', '') sheet = context + "!" if context else "" is_a_range = is_range(rng) if self.tsubtype == 'pointer': my_str = '"' + rng + '"' else: if is_a_range: sh, start, end = split_range(rng) else: try: sh, col, row = split_address(rng) except: if self.debug: print( 'WARNING: Unknown address: %s is not a cell/range reference, nor a named range' % to_str(rng)) sh = None if sh: my_str = '"' + rng + '"' else: my_str = '"' + sheet + rng + '"' to_eval = True # exception for formulas which use the address and not it content as ":" or "OFFSET" parent = self.parent(ast) # for OFFSET, it will also depends on the position in the formula (1st position required) if (parent is not None and (parent.tvalue == ':' or (parent.tvalue == 'OFFSET' and parent.children(ast)[0] == self) or (parent.tvalue == 'CHOOSE' and parent.children(ast)[0] != self and self.tsubtype == "named_range")) or pointer): to_eval = False # if parent is None and is_a_named_range: # When a named range is referenced in a cell without any prior operation # return 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) if to_eval == False: output = my_str # OFFSET HANDLER elif (parent is not None and parent.tvalue == 'OFFSET' and parent.children(ast)[1] == self and self.tsubtype == "named_range"): output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) elif (parent is not None and parent.tvalue == 'OFFSET' and parent.children(ast)[2] == self and self.tsubtype == "named_range"): output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) # INDEX HANDLER elif (parent is not None and parent.tvalue == 'INDEX' and parent.children(ast)[0] == self): # we don't use eval_ref here to avoid empty cells (which are not included in Ranges) if is_a_named_range: output = 'resolve_range(self.named_ranges[%s])' % my_str else: output = 'resolve_range(%s)' % my_str elif (parent is not None and parent.tvalue == 'INDEX' and parent.children(ast)[1] == self and self.tsubtype == "named_range"): output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) elif (parent is not None and parent.tvalue == 'INDEX' and len(parent.children(ast)) == 3 and parent.children(ast)[2] == self and self.tsubtype == "named_range"): output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) # MATCH HANDLER elif parent is not None and parent.tvalue == 'MATCH' \ and (parent.children(ast)[0] == self or len(parent.children(ast)) == 3 and parent.children(ast)[2] == self): output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) elif self.find_special_function(ast) or self.has_ind_func_parent(ast): output = 'self.eval_ref(%s)' % my_str else: output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) return output
def emit(self,ast,context=None, pointer = False): if isinstance(self.tvalue, ExcelError): if self.debug: print('WARNING: Excel Error Code found', self.tvalue) return self.tvalue is_a_range = False is_a_named_range = self.tsubtype == "named_range" if is_a_named_range: my_str = '"%s"' % self.token.tvalue else: rng = self.tvalue.replace('$','') sheet = context + "!" if context else "" is_a_range = is_range(rng) if self.tsubtype == 'pointer': my_str = '"' + rng + '"' else: if is_a_range: sh,start,end = split_range(rng) else: try: sh,col,row = split_address(rng) except: if self.debug: print('WARNING: Unknown address: %s is not a cell/range reference, nor a named range' % to_str(rng)) sh = None if sh: my_str = '"' + rng + '"' else: my_str = '"' + sheet + rng + '"' to_eval = True # exception for formulas which use the address and not it content as ":" or "OFFSET" parent = self.parent(ast) # for OFFSET, it will also depends on the position in the formula (1st position required) if (parent is not None and (parent.tvalue == ':' or (parent.tvalue == 'OFFSET' and parent.children(ast)[0] == self) or (parent.tvalue == 'CHOOSE' and parent.children(ast)[0] != self and self.tsubtype == "named_range")) or pointer): to_eval = False # if parent is None and is_a_named_range: # When a named range is referenced in a cell without any prior operation # return 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) if to_eval == False: output = my_str # OFFSET HANDLER elif (parent is not None and parent.tvalue == 'OFFSET' and parent.children(ast)[1] == self and self.tsubtype == "named_range"): output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) elif (parent is not None and parent.tvalue == 'OFFSET' and parent.children(ast)[2] == self and self.tsubtype == "named_range"): output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) # INDEX HANDLER elif (parent is not None and parent.tvalue == 'INDEX' and parent.children(ast)[0] == self): # we don't use eval_ref here to avoid empty cells (which are not included in Ranges) if is_a_named_range: output = 'resolve_range(self.named_ranges[%s])' % my_str else: output = 'resolve_range(%s)' % my_str elif (parent is not None and parent.tvalue == 'INDEX' and parent.children(ast)[1] == self and self.tsubtype == "named_range"): output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) elif (parent is not None and parent.tvalue == 'INDEX' and len(parent.children(ast)) == 3 and parent.children(ast)[2] == self and self.tsubtype == "named_range"): output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) # MATCH HANDLER elif parent is not None and parent.tvalue == 'MATCH' \ and (parent.children(ast)[0] == self or len(parent.children(ast)) == 3 and parent.children(ast)[2] == self): output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) elif self.find_special_function(ast) or self.has_ind_func_parent(ast): output = 'self.eval_ref(%s)' % my_str else: output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref)) return output
def graph_from_seeds(seeds, cell_source): """ This creates/updates a networkx graph from a list of cells. The graph is created when the cell_source is an instance of ExcelCompiler The graph is updated when the cell_source is an instance of Spreadsheet """ # when called from ExcelCompiler instance, construct cellmap and graph from seeds if cell_source.excel_compiler: # ~ cell_source is a ExcelCompiler cellmap = dict([(x.address(), x) for x in seeds]) cells = cell_source.cellmap # directed graph G = networkx.DiGraph() # match the info in cellmap for c in cellmap.values(): G.add_node(c) # when called from Spreadsheet instance, use the Spreadsheet cellmap and graph else: # ~ cell_source is a Spreadsheet cellmap = cell_source.cellmap cells = cellmap G = cell_source.G for c in seeds: G.add_node(c) cellmap[c.address()] = c # cells to analyze: only formulas todo = [s for s in seeds if s.formula] steps = [i for i, s in enumerate(todo)] names = cell_source.named_ranges while todo: c1 = todo.pop() step = steps.pop() cursheet = c1.sheet ###### 1) looking for cell c1 dependencies #################### # print 'C1', c1.address() # in case a formula, get all cells that are arguments pystr, ast = cell2code(c1, names) # set the code & compile it (will flag problems sooner rather than later) c1.python_expression = pystr.replace('"', "'") # compilation is done later if 'OFFSET' in c1.formula or 'INDEX' in c1.formula: if c1.address( ) not in cell_source.named_ranges: # pointers names already treated in ExcelCompiler cell_source.pointers.add(c1.address()) # get all the cells/ranges this formula refers to deps = [x for x in ast.nodes() if isinstance(x, RangeNode)] # remove dupes deps = uniqueify(deps) ###### 2) connect dependencies in cells in graph #################### # ### LOG # tmp = [] # for dep in deps: # if dep not in names: # if "!" not in dep and cursheet != None: # dep = cursheet + "!" + dep # if dep not in cellmap: # tmp.append(dep) # #deps = tmp # logStep = "%s %s = %s " % ('|'*step, c1.address(), '',) # print logStep # if len(deps) > 1 and 'L' in deps[0] and deps[0] == deps[-1].replace('DG','L'): # print logStep, "[%s...%s]" % (deps[0], deps[-1]) # elif len(deps) > 0: # print logStep, "->", deps # else: # print logStep, "done" for dep in deps: dep_name = dep.tvalue.replace('$', '') # this is to avoid :A1 or A1: dep due to clean_pointers() returning an ExcelError if dep_name.startswith(':') or dep_name.endswith(':'): dep_name = dep_name.replace(':', '') # if not pointer, we need an absolute address if dep.tsubtype != 'pointer' and dep_name not in names and "!" not in dep_name and cursheet != None: dep_name = cursheet + "!" + dep_name # Named_ranges + ranges already parsed (previous iterations) if dep_name in cellmap: origins = [cellmap[dep_name]] target = cellmap[c1.address()] # if the dep_name is a multi-cell range, create a range object elif is_range(dep_name) or (dep_name in names and is_range(names[dep_name])): if dep_name in names: reference = names[dep_name] else: reference = dep_name if 'OFFSET' in reference or 'INDEX' in reference: start_end = prepare_pointer(reference, names, ref_cell=c1) rng = cell_source.range(start_end) if dep_name in names: # dep is a pointer range address = dep_name else: if c1.address( ) in names: # c1 holds is a pointer range address = c1.address() else: # a pointer range with no name, its address will be its name address = '%s:%s' % (start_end["start"], start_end["end"]) cell_source.pointers.add(address) else: address = dep_name # get a list of the addresses in this range that are not yet in the graph range_addresses = list( resolve_range(reference, should_flatten=True)[0]) cellmap_add_addresses = [ addr for addr in range_addresses if addr not in cellmap.keys() ] if len(cellmap_add_addresses) > 0: # this means there are cells to be added # get row and col dimensions for the sheet, assuming the whole range is in one sheet sheet_initial = split_address( cellmap_add_addresses[0])[0] max_rows, max_cols = max_dimension( cellmap, sheet_initial) # create empty cells that aren't in the cellmap for addr in cellmap_add_addresses: sheet_new, col_new, row_new = split_address(addr) # if somehow a new sheet comes up in the range, get the new dimensions if sheet_new != sheet_initial: sheet_initial = sheet_new max_rows, max_cols = max_dimension( cellmap, sheet_new) # add the empty cells if int(row_new) <= max_rows and int( col2num(col_new)) <= max_cols: # only add cells within the maximum bounds of the sheet to avoid too many evaluations # for A:A or 1:1 ranges cell_new = Cell(addr, sheet_new, value="", should_eval='False' ) # create new cell object cellmap[ addr] = cell_new # add it to the cellmap G.add_node(cell_new) # add it to the graph cell_source.cellmap[ addr] = cell_new # add it to the cell_source, used in this function rng = cell_source.range(reference) if address in cellmap: virtual_cell = cellmap[address] else: virtual_cell = Cell(address, None, value=rng, formula=reference, is_range=True, is_named_range=True) # save the range cellmap[address] = virtual_cell # add an edge from the range to the parent G.add_node(virtual_cell) # Cell(A1:A10) -> c1 or Cell(ExampleName) -> c1 G.add_edge(virtual_cell, c1) # cells in the range should point to the range as their parent target = virtual_cell origins = [] if len( list(rng.keys()) ) != 0: # could be better, but can't check on Exception types here... for child in rng.addresses: if child not in cellmap: origins.append(cells[child]) else: origins.append(cellmap[child]) else: # not a range if dep_name in names: reference = names[dep_name] else: reference = dep_name if reference in cells: if dep_name in names: virtual_cell = Cell(dep_name, None, value=cells[reference].value, formula=reference, is_range=False, is_named_range=True) G.add_node(virtual_cell) G.add_edge(cells[reference], virtual_cell) origins = [virtual_cell] else: cell = cells[reference] origins = [cell] cell = origins[0] if cell.formula is not None and ('OFFSET' in cell.formula or 'INDEX' in cell.formula): cell_source.pointers.add(cell.address()) else: virtual_cell = Cell(dep_name, None, value=None, formula=None, is_range=False, is_named_range=True) origins = [virtual_cell] target = c1 # process each cell for c2 in flatten(origins): # if we havent treated this cell allready if c2.address() not in cellmap: if c2.formula: # cell with a formula, needs to be added to the todo list todo.append(c2) steps.append(step + 1) else: # constant cell, no need for further processing, just remember to set the code pystr, ast = cell2code(c2, names) c2.python_expression = pystr c2.compile() # save in the cellmap cellmap[c2.address()] = c2 # add to the graph G.add_node(c2) # add an edge from the cell to the parent (range or cell) if (target != []): # print "Adding edge %s --> %s" % (c2.address(), target.address()) G.add_edge(c2, target) c1.compile( ) # cell compilation is done here because pointer ranges might update python_expressions return (cellmap, G)