Exemplo n.º 1
0
    def cell_add(self, cell, value=None):
        """
        Adds a cell to the Spreadsheet. Either the cell argument can be specified, or any combination of the other
        arguments.

        :param address: the address of the cell
        :param cell: a Cell object to add
        :param value: (optional) a new value for the cell. In this case, the first argument cell is processed as
                      an address.
        :param formula:
        """
        if type(cell) != Cell:
            cell = Cell(cell,
                        None,
                        value=value,
                        formula=None,
                        is_range=False,
                        is_named_range=False)

        addr = cell.address()
        if addr in self.cellmap:
            raise Exception('Cell %s already in cellmap' % addr)

        cellmap, G = graph_from_seeds([cell], self)

        self.cellmap = cellmap
        self.G = G

        print(
            "Graph construction updated, %s nodes, %s edges, %s cellmap entries"
            % (len(G.nodes()), len(G.edges()), len(cellmap)))
Exemplo n.º 2
0
    def cell_set_value(self, address, val):
        """
        Set the value of a cell

        :param address: the address of a cell
        :param value: the new value
        """
        self.reset_buffer = set()

        try:
            address = address.replace('$', '')
            address = address.replace("'", '')
            cell = self.cellmap[address]

            # when you set a value on cell, its should_eval flag is set to 'never' so its formula is not used until set free again => sp.activate_formula()
            self.cell_fix(address)

            # case where the address refers to a range
            if cell.is_range:
                cells_to_set = []

                if not isinstance(val, list):
                    val = [val] * len(cells_to_set)

                self.cell_reset(cell)
                cell.range.values = val

            # case where the address refers to a single value
            else:
                if address in self.named_ranges:  # if the cell is a named range, we need to update and fix the reference cell
                    ref_address = self.named_ranges[address]

                    if ref_address in self.cellmap:
                        ref_cell = self.cellmap[ref_address]
                    else:
                        ref_cell = Cell(ref_address,
                                        None,
                                        value=val,
                                        formula=None,
                                        is_range=False,
                                        is_named_range=False)
                        self.cell_add(ref_cell)

                    ref_cell.value = val

                if cell.value != val:
                    if cell.value is None:
                        cell.value = 'notNone'  # hack to avoid the direct return in reset() when value is None
                    # reset the node + its dependencies
                    self.cell_reset(cell)
                    # set the value
                    cell.value = val

            for vol in self.pointers_to_reset:  # reset all pointers
                self.cell_reset(self.cellmap[vol])
        except KeyError:
            raise Exception('Cell %s not in cellmap' % address)
Exemplo n.º 3
0
    def set_value(self, address, val):

        self.reset_buffer = set()

        if address not in self.cellmap:
            raise Exception("Address not present in graph.")

        address = address.replace('$', '')
        cell = self.cellmap[address]

        # when you set a value on cell, its should_eval flag is set to 'never' so its formula is not used until set free again => sp.activate_formula()
        self.fix_cell(address)

        # case where the address refers to a range
        if self.cellmap[address].is_range:
            cells_to_set = []
            # for a in self.cellmap[address].range.addresses:
            # if a in self.cellmap:
            #     cells_to_set.append(self.cellmap[a])
            #     self.fix_cell(a)

            if type(val) != list:
                val = [val] * len(cells_to_set)

            self.reset(cell)
            cell.range.values = val

        # case where the address refers to a single value
        else:
            if address in self.named_ranges:  # if the cell is a named range, we need to update and fix the reference cell
                ref_address = self.named_ranges[address]

                if ref_address in self.cellmap:
                    ref_cell = self.cellmap[ref_address]
                else:
                    ref_cell = Cell(ref_address,
                                    None,
                                    value=val,
                                    formula=None,
                                    is_range=False,
                                    is_named_range=False)
                    self.add_cell(ref_cell)

                # self.fix_cell(ref_address)
                ref_cell.value = val

            if cell.value != val:
                if cell.value is None:
                    cell.value = 'notNone'  # hack to avoid the direct return in reset() when value is None
                # reset the node + its dependencies
                self.reset(cell)
                # set the value
                cell.value = val

        for vol in self.pointers_to_reset:  # reset all pointers
            self.reset(self.cellmap[vol])
Exemplo n.º 4
0
 def cell_from_dict(d):
     cell_is_range = type(d["value"]) == dict
     if cell_is_range:
         range = d["value"]
         if len(range["values"]) == 0:
             range["values"] = [None] * len(range["cells"])
         value = RangeCore(range["cells"], range["values"], nrows = range["nrows"], ncols = range["ncols"])
     else:
         value = d["value"]
     new_cell = Cell(d["address"], None, value=value, formula=d["formula"], is_range = cell_is_range, is_named_range=d["is_named_range"], should_eval=d["should_eval"])
     new_cell.python_expression = d["python_expression"]
     new_cell.compile()
     return {"id": new_cell}
Exemplo n.º 5
0
 def cell_from_dict(d):
     cell_is_range = type(d["value"]) == dict
     if cell_is_range:
         range = d["value"]
         if len(range["values"]) == 0:
             range["values"] = [None] * len(range["cells"])
         value = RangeCore(range["cells"], range["values"], nrows = range["nrows"], ncols = range["ncols"])
     else:
         value = d["value"]
     new_cell = Cell(d["address"], None, value=value, formula=d["formula"], is_range = cell_is_range, is_named_range=d["is_named_range"], should_eval=d["should_eval"])
     new_cell.python_expression = d["python_expression"]
     new_cell.compile()
     return {"id": new_cell}
Exemplo n.º 6
0
    def set_value(self, address, val):

        self.reset_buffer = set()

        if address not in self.cellmap:
            raise Exception("Address not present in graph.")

        address = address.replace('$','')
        cell = self.cellmap[address]

        # when you set a value on cell, its should_eval flag is set to 'never' so its formula is not used until set free again => sp.activate_formula()
        self.fix_cell(address)

        # case where the address refers to a range
        if self.cellmap[address].is_range: 
            cells_to_set = []
            # for a in self.cellmap[address].range.addresses:
                # if a in self.cellmap:
                #     cells_to_set.append(self.cellmap[a])
                #     self.fix_cell(a)

            if type(val) != list:
                val = [val]*len(cells_to_set)

            self.reset(cell)
            cell.range.values = val

        # case where the address refers to a single value
        else:
            if address in self.named_ranges: # if the cell is a named range, we need to update and fix the reference cell
                ref_address = self.named_ranges[address]
                
                if ref_address in self.cellmap:
                    ref_cell = self.cellmap[ref_address]
                else:
                    ref_cell = Cell(ref_address, None, value = val, formula = None, is_range = False, is_named_range = False )
                    self.add_cell(ref_cell)

                # self.fix_cell(ref_address)
                ref_cell.value = val

            if cell.value != val:
                if cell.value is None:
                    cell.value = 'notNone' # hack to avoid the direct return in reset() when value is None
                # reset the node + its dependencies
                self.reset(cell)
                # set the value
                cell.value = val

        for vol_range in self.volatiles: # reset all volatiles
            self.reset(self.cellmap[vol_range])
Exemplo n.º 7
0
    def add_cell(self, cell, value = None):
        
        if type(cell) != Cell:
            cell = Cell(cell, None, value = value, formula = None, is_range = False, is_named_range = False)
        
        addr = cell.address()
        if addr in self.cellmap:
            raise Exception('Cell %s already in cellmap' % addr)

        cellmap, G = graph_from_seeds([cell], self)

        self.cellmap = cellmap
        self.G = G

        print "Graph construction updated, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap))
Exemplo n.º 8
0
    def add_cell(self, cell, value=None):

        if type(cell) != Cell:
            cell = Cell(cell,
                        None,
                        value=value,
                        formula=None,
                        is_range=False,
                        is_named_range=False)

        addr = cell.address()
        if addr in self.cellmap:
            raise Exception('Cell %s already in cellmap' % addr)

        cellmap, G = graph_from_seeds([cell], self)

        self.cellmap = cellmap
        self.G = G

        print "Graph construction updated, %s nodes, %s edges, %s cellmap entries" % (
            len(G.nodes()), len(G.edges()), len(cellmap))
Exemplo n.º 9
0
def graph_from_seeds(seeds, cell_source):
    """
    This creates/updates a networkx graph from a list of cells.

    The graph is created when the cell_source is an instance of ExcelCompiler
    The graph is updated when the cell_source is an instance of Spreadsheet
    """

    # when called from Spreadsheet instance, use the Spreadsheet cellmap and graph
    if hasattr(cell_source, 'G'):  # ~ cell_source is a Spreadsheet
        cellmap = cell_source.cellmap
        cells = cellmap
        G = cell_source.G
        for c in seeds:
            G.add_node(c)
            cellmap[c.address()] = c
    # when called from ExcelCompiler instance, construct cellmap and graph from seeds
    else:  # ~ cell_source is a ExcelCompiler
        cellmap = dict([(x.address(), x) for x in seeds])
        cells = cell_source.cells
        # directed graph
        G = networkx.DiGraph()
        # match the info in cellmap
        for c in cellmap.values():
            G.add_node(c)

    # cells to analyze: only formulas
    todo = [s for s in seeds if s.formula]
    steps = [i for i, s in enumerate(todo)]
    names = cell_source.named_ranges

    while todo:
        c1 = todo.pop()
        step = steps.pop()
        cursheet = c1.sheet

        ###### 1) looking for cell c1 dependencies ####################
        # print 'C1', c1.address()
        # in case a formula, get all cells that are arguments
        pystr, ast = cell2code(c1, names)
        # set the code & compile it (will flag problems sooner rather than later)
        c1.python_expression = pystr.replace('"',
                                             "'")  # compilation is done later

        if 'OFFSET' in c1.formula or 'INDEX' in c1.formula:
            if c1.address(
            ) not in cell_source.named_ranges:  # pointers names already treated in ExcelCompiler
                cell_source.pointers.add(c1.address())

        # get all the cells/ranges this formula refers to
        deps = [x for x in ast.nodes() if isinstance(x, RangeNode)]
        # remove dupes
        deps = uniqueify(deps)

        ###### 2) connect dependencies in cells in graph ####################

        # ### LOG
        # tmp = []
        # for dep in deps:
        #     if dep not in names:
        #         if "!" not in dep and cursheet != None:
        #             dep = cursheet + "!" + dep
        #     if dep not in cellmap:
        #         tmp.append(dep)
        # #deps = tmp
        # logStep = "%s %s = %s " % ('|'*step, c1.address(), '',)
        # print logStep

        # if len(deps) > 1 and 'L' in deps[0] and deps[0] == deps[-1].replace('DG','L'):
        #     print logStep, "[%s...%s]" % (deps[0], deps[-1])
        # elif len(deps) > 0:
        #     print logStep, "->", deps
        # else:
        #     print logStep, "done"

        for dep in deps:
            dep_name = dep.tvalue.replace('$', '')

            # this is to avoid :A1 or A1: dep due to clean_pointers() returning an ExcelError
            if dep_name.startswith(':') or dep_name.endswith(':'):
                dep_name = dep_name.replace(':', '')

            # if not pointer, we need an absolute address
            if dep.tsubtype != 'pointer' and dep_name not in names and "!" not in dep_name and cursheet != None:
                dep_name = cursheet + "!" + dep_name

            # Named_ranges + ranges already parsed (previous iterations)
            if dep_name in cellmap:
                origins = [cellmap[dep_name]]
                target = cellmap[c1.address()]
            # if the dep_name is a multi-cell range, create a range object
            elif is_range(dep_name) or (dep_name in names
                                        and is_range(names[dep_name])):
                if dep_name in names:
                    reference = names[dep_name]
                else:
                    reference = dep_name

                if 'OFFSET' in reference or 'INDEX' in reference:
                    start_end = prepare_pointer(reference, names, ref_cell=c1)
                    rng = cell_source.range(start_end)

                    if dep_name in names:  # dep is a pointer range
                        address = dep_name
                    else:
                        if c1.address(
                        ) in names:  # c1 holds is a pointer range
                            address = c1.address()
                        else:  # a pointer range with no name, its address will be its name
                            address = '%s:%s' % (start_end["start"],
                                                 start_end["end"])
                            cell_source.pointers.add(address)
                else:
                    address = dep_name

                    # get a list of the addresses in this range that are not yet in the graph
                    range_addresses = list(
                        resolve_range(reference, should_flatten=True)[0])
                    cellmap_add_addresses = [
                        addr for addr in range_addresses
                        if addr not in cellmap.keys()
                    ]

                    if len(cellmap_add_addresses) > 0:
                        # this means there are cells to be added

                        # get row and col dimensions for the sheet, assuming the whole range is in one sheet
                        sheet_initial = split_address(
                            cellmap_add_addresses[0])[0]
                        max_rows, max_cols = max_dimension(
                            cellmap, sheet_initial)

                        # create empty cells that aren't in the cellmap
                        for addr in cellmap_add_addresses:
                            sheet_new, col_new, row_new = split_address(addr)

                            # if somehow a new sheet comes up in the range, get the new dimensions
                            if sheet_new != sheet_initial:
                                sheet_initial = sheet_new
                                max_rows, max_cols = max_dimension(
                                    cellmap, sheet_new)

                            # add the empty cells
                            if int(row_new) <= max_rows and int(
                                    col2num(col_new)) <= max_cols:
                                # only add cells within the maximum bounds of the sheet to avoid too many evaluations
                                # for A:A or 1:1 ranges

                                cell_new = Cell(addr,
                                                sheet_new,
                                                value="",
                                                should_eval='False'
                                                )  # create new cell object
                                cellmap[
                                    addr] = cell_new  # add it to the cellmap
                                G.add_node(cell_new)  # add it to the graph
                                cell_source.cells[
                                    addr] = cell_new  # add it to the cell_source, used in this function

                    rng = cell_source.range(reference)

                if address in cellmap:
                    virtual_cell = cellmap[address]
                else:
                    virtual_cell = Cell(address,
                                        None,
                                        value=rng,
                                        formula=reference,
                                        is_range=True,
                                        is_named_range=True)
                    # save the range
                    cellmap[address] = virtual_cell

                # add an edge from the range to the parent
                G.add_node(virtual_cell)
                # Cell(A1:A10) -> c1 or Cell(ExampleName) -> c1
                G.add_edge(virtual_cell, c1)
                # cells in the range should point to the range as their parent
                target = virtual_cell
                origins = []

                if len(
                        list(rng.keys())
                ) != 0:  # could be better, but can't check on Exception types here...
                    for child in rng.addresses:
                        if child not in cellmap:
                            origins.append(cells[child])
                        else:
                            origins.append(cellmap[child])
            else:
                # not a range
                if dep_name in names:
                    reference = names[dep_name]
                else:
                    reference = dep_name

                if reference in cells:
                    if dep_name in names:
                        virtual_cell = Cell(dep_name,
                                            None,
                                            value=cells[reference].value,
                                            formula=reference,
                                            is_range=False,
                                            is_named_range=True)

                        G.add_node(virtual_cell)
                        G.add_edge(cells[reference], virtual_cell)

                        origins = [virtual_cell]
                    else:
                        cell = cells[reference]
                        origins = [cell]

                    cell = origins[0]

                    if cell.formula is not None and ('OFFSET' in cell.formula
                                                     or 'INDEX'
                                                     in cell.formula):
                        cell_source.pointers.add(cell.address())
                else:
                    virtual_cell = Cell(dep_name,
                                        None,
                                        value=None,
                                        formula=None,
                                        is_range=False,
                                        is_named_range=True)
                    origins = [virtual_cell]

                target = c1

            # process each cell
            for c2 in flatten(origins):

                # if we havent treated this cell allready
                if c2.address() not in cellmap:
                    if c2.formula:
                        # cell with a formula, needs to be added to the todo list
                        todo.append(c2)
                        steps.append(step + 1)
                    else:
                        # constant cell, no need for further processing, just remember to set the code
                        pystr, ast = cell2code(c2, names)
                        c2.python_expression = pystr
                        c2.compile()

                    # save in the cellmap
                    cellmap[c2.address()] = c2
                    # add to the graph
                    G.add_node(c2)

                # add an edge from the cell to the parent (range or cell)
                if (target != []):
                    # print "Adding edge %s --> %s" % (c2.address(), target.address())
                    G.add_edge(c2, target)

        c1.compile(
        )  # cell compilation is done here because pointer ranges might update python_expressions

    return (cellmap, G)
Exemplo n.º 10
0
def load(fname):

    def clean_bool(string):
        if string == "0":
            return None
        else:
            return string

    def to_bool(string):
        if string == "1" or string == "True":
            return True
        elif string == "0" or string == "False":
            return False
        else:
            return string
    def to_float(string):
        if string == "None":
            return None
        try:
            return float(string)
        except:
            return string

    mode = "node0"
    nodes = []
    edges = []
    volatiles = set()
    outputs = None
    inputs = None
    named_ranges = {}
    infile = gzip.GzipFile(fname, 'r')

    for line in infile.read().splitlines():

        if line == "====":
            mode = "node0"
            continue
        if line == "-----":
            cellmap_temp = {n.address(): n for n in nodes}
            Range = RangeFactory(cellmap_temp)
            mode = "node0"
            continue
        elif line == "edges":
            cellmap = {n.address(): n for n in nodes}
            mode = "edges"
            continue
        elif line == "outputs":
            mode = "outputs"
            continue
        elif line == "inputs":
            mode = "inputs"
            continue
        elif line == "named_ranges":
            mode = "named_ranges"
            continue

        if mode == "node0":
            [address, formula, python_expression, is_range, is_named_range, is_volatile, should_eval] = line.split(SEP)
            formula = clean_bool(formula)
            python_expression = clean_bool(python_expression)
            is_range = to_bool(is_range)
            is_named_range = to_bool(is_named_range)
            is_volatile = to_bool(is_volatile)
            should_eval = should_eval
            mode = "node1"
        elif mode == "node1":
            if is_range:

                reference = json.loads(line) if is_volatile else line # in order to be able to parse dicts
                vv = Range(reference)

                if is_volatile:
                    if not is_named_range:
                        address = vv.name

                    volatiles.add(address)

                cell = Cell(address, None, vv, formula, is_range, is_named_range, should_eval)
                cell.python_expression = python_expression
                nodes.append(cell)
            else:
                value = to_bool(to_float(line))
                
                cell = Cell(address, None, value, formula, is_range, is_named_range, should_eval)
                
                cell.python_expression = python_expression
                if formula:
                    if 'OFFSET' in formula or 'INDEX' in formula:
                        volatiles.add(address)


                    cell.compile()               
                nodes.append(cell)
        elif mode == "edges":
            source, target = line.split(SEP)
            edges.append((cellmap[source], cellmap[target]))
        elif mode == "outputs":
            outputs = line.split(SEP)
        elif mode == "inputs":
            inputs = line.split(SEP)
        elif mode == "named_ranges":
            k,v = line.split(SEP)
            named_ranges[k] = v

    G = DiGraph(data = edges)

    print "Graph loading done, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap))

    return (G, cellmap, named_ranges, volatiles, outputs, inputs)
Exemplo n.º 11
0
    def clean_pointer(self):
        print '___### Cleaning Pointers ###___'

        new_named_ranges = self.named_ranges.copy()
        new_cells = self.cellmap.copy()

        ### 1) create ranges
        for n in self.named_ranges:
            reference = self.named_ranges[n]
            if is_range(reference):
                if 'OFFSET' not in reference:
                    my_range = self.Range(reference)
                    self.cellmap[n] = Cell(n,
                                           None,
                                           value=my_range,
                                           formula=reference,
                                           is_range=True,
                                           is_named_range=True)
                else:
                    self.cellmap[n] = Cell(n,
                                           None,
                                           value=None,
                                           formula=reference,
                                           is_range=False,
                                           is_named_range=True)
            else:
                if reference in self.cellmap:
                    self.cellmap[n] = Cell(n,
                                           None,
                                           value=self.cellmap[reference].value,
                                           formula=reference,
                                           is_range=False,
                                           is_named_range=True)
                else:
                    self.cellmap[n] = Cell(n,
                                           None,
                                           value=None,
                                           formula=reference,
                                           is_range=False,
                                           is_named_range=True)

        ### 2) gather all occurence of pointer functions in cells or named_range
        all_pointers = set()

        for pointer_name in self.pointer_to_remove:
            for k, v in self.named_ranges.items():
                if pointer_name in v:
                    all_pointers.add((v, k, None))
            for k, cell in self.cellmap.items():
                if cell.formula and pointer_name in cell.formula:
                    all_pointers.add(
                        (cell.formula, cell.address(), cell.sheet))

            # print "%s %s to parse" % (str(len(all_pointers)), pointer_name)

        ### 3) evaluate all pointers

        for formula, address, sheet in all_pointers:

            if sheet:
                parsed = parse_cell_address(address)
            else:
                parsed = ""
            e = shunting_yard(formula,
                              self.named_ranges,
                              ref=parsed,
                              tokenize_range=True)
            ast, root = build_ast(e)
            code = root.emit(ast)

            cell = {"formula": formula, "address": address, "sheet": sheet}

            replacements = self.eval_pointers_from_ast(ast, root, cell)

            new_formula = formula
            if type(replacements) == list:
                for repl in replacements:
                    if type(repl["value"]) == ExcelError:
                        if self.debug:
                            print 'WARNING: Excel error found => replacing with #N/A'
                        repl["value"] = "#N/A"

                    if repl["expression_type"] == "value":
                        new_formula = new_formula.replace(
                            repl["formula"], str(repl["value"]))
                    else:
                        new_formula = new_formula.replace(
                            repl["formula"], repl["value"])
            else:
                new_formula = None

            if address in new_named_ranges:
                new_named_ranges[address] = new_formula
            else:
                old_cell = self.cellmap[address]
                new_cells[address] = Cell(
                    old_cell.address(),
                    old_cell.sheet,
                    value=old_cell.value,
                    formula=new_formula,
                    is_range=old_cell.is_range,
                    is_named_range=old_cell.is_named_range,
                    should_eval=old_cell.should_eval)

        return new_cells, new_named_ranges
Exemplo n.º 12
0
    def prune_graph(self):
        print '___### Pruning Graph ###___'

        G = self.G

        # get all the cells impacted by inputs
        dependencies = set()
        for input_address in self.inputs:
            child = self.cellmap[input_address]
            if child == None:
                print "Not found ", input_address
                continue
            g = make_subgraph(G, child, "descending")
            dependencies = dependencies.union(g.nodes())

        # print "%s cells depending on inputs" % str(len(dependencies))

        # prune the graph and set all cell independent of input to const
        subgraph = networkx.DiGraph()
        new_cellmap = {}
        for output_address in self.outputs:
            new_cellmap[output_address] = self.cellmap[output_address]
            seed = self.cellmap[output_address]
            todo = map(lambda n: (seed, n), G.predecessors(seed))
            done = set(todo)

            while len(todo) > 0:
                current, pred = todo.pop()
                # print "==========================="
                # print current.address(), pred.address()
                if current in dependencies:
                    if pred in dependencies or isinstance(
                            pred.value, RangeCore) or pred.is_named_range:
                        subgraph.add_edge(pred, current)
                        new_cellmap[pred.address()] = pred
                        new_cellmap[current.address()] = current

                        nexts = G.predecessors(pred)
                        for n in nexts:
                            if (pred, n) not in done:
                                todo += [(pred, n)]
                                done.add((pred, n))
                    else:
                        if pred.address() not in new_cellmap:
                            const_node = Cell(
                                pred.address(),
                                pred.sheet,
                                value=pred.range
                                if pred.is_range else pred.value,
                                formula=None,
                                is_range=isinstance(pred.range, RangeCore),
                                is_named_range=pred.is_named_range,
                                should_eval=pred.should_eval)
                            # pystr,ast = cell2code(self.named_ranges, const_node, pred.sheet)
                            # const_node.python_expression = pystr
                            # const_node.compile()
                            new_cellmap[pred.address()] = const_node

                        const_node = new_cellmap[pred.address()]
                        subgraph.add_edge(const_node, current)

                else:
                    # case of range independant of input, we add all children as const
                    if pred.address() not in new_cellmap:
                        const_node = Cell(
                            pred.address(),
                            pred.sheet,
                            value=pred.range if pred.is_range else pred.value,
                            formula=None,
                            is_range=pred.is_range,
                            is_named_range=pred.is_named_range,
                            should_eval=pred.should_eval)
                        # pystr,ast = cell2code(self.named_ranges, const_node, pred.sheet)
                        # const_node.python_expression = pystr
                        # const_node.compile()
                        new_cellmap[pred.address()] = const_node

                    const_node = new_cellmap[pred.address()]
                    subgraph.add_edge(const_node, current)

        print "Graph pruning done, %s nodes, %s edges, %s cellmap entries" % (
            len(subgraph.nodes()), len(subgraph.edges()), len(new_cellmap))
        undirected = networkx.Graph(subgraph)
        # print "Number of connected components %s", str(number_connected_components(undirected))
        # print map(lambda x: x.address(), subgraph.nodes())

        # add back inputs that have been pruned because they are outside of calculation chain
        for i in self.inputs:
            if i not in new_cellmap:
                if i in self.named_ranges:
                    reference = self.named_ranges[i]
                    if is_range(reference):

                        rng = self.Range(reference)
                        virtual_cell = Cell(i,
                                            None,
                                            value=rng,
                                            formula=reference,
                                            is_range=True,
                                            is_named_range=True)
                        new_cellmap[i] = virtual_cell
                        subgraph.add_node(
                            virtual_cell
                        )  # edges are not needed here since the input here is not in the calculation chain

                    else:
                        # might need to be changed to actual self.cells Cell, not a copy
                        virtual_cell = Cell(
                            i,
                            None,
                            value=self.cellmap[reference].value,
                            formula=reference,
                            is_range=False,
                            is_named_range=True)
                        new_cellmap[i] = virtual_cell
                        subgraph.add_node(
                            virtual_cell
                        )  # edges are not needed here since the input here is not in the calculation chain
                else:
                    if is_range(i):
                        rng = self.Range(i)
                        virtual_cell = Cell(i,
                                            None,
                                            value=rng,
                                            formula=o,
                                            is_range=True,
                                            is_named_range=True)
                        new_cellmap[i] = virtual_cell
                        subgraph.add_node(
                            virtual_cell
                        )  # edges are not needed here since the input here is not in the calculation chain
                    else:
                        new_cellmap[i] = self.cellmap[i]
                        subgraph.add_node(
                            self.cellmap[i]
                        )  # edges are not needed here since the input here is not in the calculation chain

        return Spreadsheet(subgraph,
                           new_cellmap,
                           self.named_ranges,
                           self.pointers,
                           self.outputs,
                           self.inputs,
                           debug=self.debug)
Exemplo n.º 13
0
 def test_Modify_graph(self):
     self.sp.add_cell(Cell('Sheet1!A4', formula='A1 + 10'))
     self.sp.set_value('Sheet1!A1', 3)
     self.assertEqual(self.sp.evaluate('Sheet1!A4'), 13)
Exemplo n.º 14
0
def read_cells(archive, ignore_sheets=[], ignore_hidden=False):
    global debug

    print('___### Reading Cells from XLSX ###___')

    cells = {}

    functions = set()

    cts = dict(read_content_types(archive))

    strings_path = cts.get(
        SHARED_STRINGS
    )  # source: https://bitbucket.org/openpyxl/openpyxl/src/93604327bce7aac5e8270674579af76d390e09c0/openpyxl/reader/excel.py?at=default&fileviewer=file-view-default
    if strings_path is not None:
        if strings_path.startswith("/"):
            strings_path = strings_path[1:]
        shared_strings = read_string_table(archive.read(strings_path))
    else:
        shared_strings = []

    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']

        function_map = {}

        if sheet_name in ignore_sheets: continue

        root = fromstring(
            archive.read(sheet['path'])
        )  # it is necessary to use cElementTree from xml module, otherwise root.findall doesn't work as it should

        hidden_cols = False
        nb_hidden = 0

        if ignore_hidden:
            hidden_col_min = None
            hidden_col_max = None

            for col in root.findall('.//{%s}cols/*' % SHEET_MAIN_NS):
                if 'hidden' in col.attrib and col.attrib['hidden'] == '1':
                    hidden_cols = True
                    hidden_col_min = int(col.attrib['min'])
                    hidden_col_max = int(col.attrib['max'])

        for c in root.findall('.//{%s}c/*/..' % SHEET_MAIN_NS):
            cell_data_type = c.get('t',
                                   'n')  # if no type assigned, assign 'number'
            cell_address = c.attrib['r']

            skip = False

            if hidden_cols:
                found = re.search(CELL_REF_RE, cell_address)
                col = col2num(found.group(1))

                if col >= hidden_col_min and col <= hidden_col_max:
                    nb_hidden += 1
                    skip = True

            if not skip:
                cell = {
                    'a': '%s!%s' % (sheet_name, cell_address),
                    'f': None,
                    'v': None
                }
                if debug: print('Cell', cell['a'])
                for child in c:
                    child_data_type = child.get(
                        't', 'n')  # if no type assigned, assign 'number'

                    if child.tag == '{%s}f' % SHEET_MAIN_NS:
                        if 'ref' in child.attrib:  # the first cell of a shared formula has a 'ref' attribute
                            if debug:
                                print(
                                    '*** Found definition of shared formula ***',
                                    child.text, child.attrib['ref'])
                            if "si" in child.attrib:
                                function_map[child.attrib['si']] = (
                                    child.attrib['ref'],
                                    Translator(str('=' + child.text),
                                               cell_address)
                                )  # translator of openpyxl needs a unicode argument that starts with '='
                            # else:
                            #     print "Encountered cell with ref but not si: ", sheet_name, child.attrib['ref']
                        if child_data_type == 'shared':
                            if debug:
                                print(
                                    '*** Found child %s of shared formula %s ***'
                                    % (cell_address, child.attrib['si']))

                            ref = function_map[child.attrib['si']][0]
                            formula = function_map[child.attrib['si']][1]

                            translated = formula.translate_formula(
                                cell_address)
                            cell['f'] = translated[
                                1:]  # we need to get rid of the '='

                        else:
                            cell['f'] = child.text

                    elif child.tag == '{%s}v' % SHEET_MAIN_NS:
                        if cell_data_type == 's' or cell_data_type == 'str':  # value is a string
                            try:  # if it fails, it means that cell content is a string calculated from a formula
                                cell['v'] = shared_strings[int(child.text)]
                            except:
                                cell['v'] = child.text
                        elif cell_data_type == 'b':
                            cell['v'] = bool(int(child.text))
                        elif cell_data_type == 'n':
                            cell['v'] = _cast_number(child.text)

                    elif child.text is None:
                        continue

                if cell['f'] is not None:

                    pattern = re.compile(r"([A-Z][A-Z0-9]*)\(")
                    found = re.findall(pattern, cell['f'])

                    map(lambda x: functions.add(x), found)

                if cell['f'] is not None or cell['v'] is not None:
                    should_eval = 'always' if cell[
                        'f'] is not None and 'OFFSET' in cell['f'] else 'normal'

                    # cleaned_formula = cell['f']
                    cleaned_formula = cell['f'].replace(
                        ", ", ",") if cell['f'] is not None else None
                    if "!" in cell_address:
                        cells[cell_address] = Cell(cell_address,
                                                   sheet_name,
                                                   value=cell['v'],
                                                   formula=cleaned_formula,
                                                   should_eval=should_eval)
                    else:
                        cells[sheet_name + "!" + cell_address] = Cell(
                            cell_address,
                            sheet_name,
                            value=cell['v'],
                            formula=cleaned_formula,
                            should_eval=should_eval)

        if nb_hidden > 0:
            print('Ignored %i hidden cells in sheet %s' %
                  (nb_hidden, sheet_name))

    print('Nb of different functions %i' % len(functions))
    print(functions)

    for f in functions:
        if f not in existing:
            print('== Missing function: %s' % f)

    return cells
Exemplo n.º 15
0
def load(fname):
    def clean_bool(string):
        if string == "0":
            return None
        else:
            return string

    def to_bool(string):
        if string == "1" or string == "True":
            return True
        elif string == "0" or string == "False":
            return False
        else:
            return string

    def to_float(string):
        if string == "None":
            return None
        try:
            return float(string)
        except:
            return string

    mode = "node0"
    nodes = []
    edges = []
    pointers = set()
    outputs = None
    inputs = None
    named_ranges = {}
    infile = gzip.GzipFile(fname, 'r')

    for line in infile.read().splitlines():

        if line == "====":
            mode = "node0"
            continue
        if line == "-----":
            cellmap_temp = {n.address(): n for n in nodes}
            Range = RangeFactory(cellmap_temp)
            mode = "node0"
            continue
        elif line == "edges":
            cellmap = {n.address(): n for n in nodes}
            mode = "edges"
            continue
        elif line == "outputs":
            mode = "outputs"
            continue
        elif line == "inputs":
            mode = "inputs"
            continue
        elif line == "named_ranges":
            mode = "named_ranges"
            continue

        if mode == "node0":
            [
                address, formula, python_expression, is_range, is_named_range,
                is_pointer, should_eval
            ] = line.split(SEP)
            formula = clean_bool(formula)
            python_expression = clean_bool(python_expression)
            is_range = to_bool(is_range)
            is_named_range = to_bool(is_named_range)
            is_pointer = to_bool(is_pointer)
            should_eval = should_eval
            mode = "node1"
        elif mode == "node1":
            if is_range:

                reference = json.loads(
                    line
                ) if is_pointer else line  # in order to be able to parse dicts
                vv = Range(reference)

                if is_pointer:
                    if not is_named_range:
                        address = vv.name

                    pointers.add(address)

                cell = Cell(address, None, vv, formula, is_range,
                            is_named_range, should_eval)
                cell.python_expression = python_expression
                nodes.append(cell)
            else:
                value = to_bool(to_float(line))

                cell = Cell(address, None, value, formula, is_range,
                            is_named_range, should_eval)

                cell.python_expression = python_expression
                if formula:
                    if 'OFFSET' in formula or 'INDEX' in formula:
                        pointers.add(address)

                    cell.compile()
                nodes.append(cell)
        elif mode == "edges":
            source, target = line.split(SEP)
            edges.append((cellmap[source], cellmap[target]))
        elif mode == "outputs":
            outputs = line.split(SEP)
        elif mode == "inputs":
            inputs = line.split(SEP)
        elif mode == "named_ranges":
            k, v = line.split(SEP)
            named_ranges[k] = v

    G = DiGraph(data=edges)

    print "Graph loading done, %s nodes, %s edges, %s cellmap entries" % (len(
        G.nodes()), len(G.edges()), len(cellmap))

    return (G, cellmap, named_ranges, pointers, outputs, inputs)
Exemplo n.º 16
0
    def gen_graph(self, outputs=[], inputs=[]):
        """
        Generate the contents of the Spreadsheet from the read cells in the binary files.
        Specifically this function generates the graph.

        :param outputs: can be used to specify the outputs. All not affected cells are removed from the graph.
        :param inputs: can be used to specify the inputs. All not affected cells are removed from the graph.
        """
        # print('___### Generating Graph ###___')

        if len(outputs) == 0:
            preseeds = set(
                list(flatten(self.cellmap.keys())) +
                list(self.named_ranges.keys()))  # to have unicity
        else:
            preseeds = set(outputs)

        preseeds = list(preseeds)  # to be able to modify the list

        seeds = []
        for o in preseeds:
            if o in self.named_ranges:
                reference = self.named_ranges[o]

                if is_range(reference):
                    if 'OFFSET' in reference or 'INDEX' in reference:
                        start_end = prepare_pointer(reference,
                                                    self.named_ranges)
                        rng = self.range(start_end)
                        self.pointers.add(o)
                    else:
                        rng = self.range(reference)

                    for address in rng.addresses:  # this is avoid pruning deletion
                        preseeds.append(address)
                    virtual_cell = Cell(o,
                                        None,
                                        value=rng,
                                        formula=reference,
                                        is_range=True,
                                        is_named_range=True)
                    seeds.append(virtual_cell)
                else:
                    # might need to be changed to actual self.cells Cell, not a copy
                    if 'OFFSET' in reference or 'INDEX' in reference:
                        self.pointers.add(o)

                    value = self.cellmap[
                        reference].value if reference in self.cellmap else None
                    virtual_cell = Cell(o,
                                        None,
                                        value=value,
                                        formula=reference,
                                        is_range=False,
                                        is_named_range=True)
                    seeds.append(virtual_cell)
            else:
                if is_range(o):
                    rng = self.range(o)
                    for address in rng.addresses:  # this is avoid pruning deletion
                        preseeds.append(address)
                    virtual_cell = Cell(o,
                                        None,
                                        value=rng,
                                        formula=o,
                                        is_range=True,
                                        is_named_range=True)
                    seeds.append(virtual_cell)
                else:
                    seeds.append(self.cellmap[o])

        seeds = set(seeds)
        # print("Seeds %s cells" % len(seeds))
        outputs = set(preseeds) if len(outputs) > 0 else [
        ]  # seeds and outputs are the same when you don't specify outputs

        cellmap, G = graph_from_seeds(seeds, self)

        if len(
                inputs
        ) != 0:  # otherwise, we'll set inputs to cellmap inside Spreadsheet
            inputs = list(set(inputs))

            # add inputs that are outside of calculation chain
            for i in inputs:
                if i not in cellmap:
                    if i in self.named_ranges:
                        reference = self.named_ranges[i]
                        if is_range(reference):

                            rng = self.range(reference)
                            for address in rng.addresses:  # this is avoid pruning deletion
                                inputs.append(address)
                            virtual_cell = Cell(i,
                                                None,
                                                value=rng,
                                                formula=reference,
                                                is_range=True,
                                                is_named_range=True)
                            cellmap[i] = virtual_cell
                            G.add_node(
                                virtual_cell
                            )  # edges are not needed here since the input here is not in the calculation chain

                        else:
                            # might need to be changed to actual self.cells Cell, not a copy
                            virtual_cell = Cell(
                                i,
                                None,
                                value=self.cellmap[reference].value,
                                formula=reference,
                                is_range=False,
                                is_named_range=True)
                            cellmap[i] = virtual_cell
                            G.add_node(
                                virtual_cell
                            )  # edges are not needed here since the input here is not in the calculation chain
                    else:
                        if is_range(i):
                            rng = self.range(i)
                            for address in rng.addresses:  # this is avoid pruning deletion
                                inputs.append(address)
                            virtual_cell = Cell(i,
                                                None,
                                                value=rng,
                                                formula=o,
                                                is_range=True,
                                                is_named_range=True)
                            cellmap[i] = virtual_cell
                            G.add_node(
                                virtual_cell
                            )  # edges are not needed here since the input here is not in the calculation chain
                        else:
                            cellmap[i] = self.cellmap[i]
                            G.add_node(
                                self.cellmap[i]
                            )  # edges are not needed here since the input here is not in the calculation chain

            inputs = set(inputs)

        # print("Graph construction done, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap)))

        # undirected = networkx.Graph(G)
        # print "Number of connected components %s", str(number_connected_components(undirected))

        return Spreadsheet(G,
                           cellmap,
                           self.named_ranges,
                           pointers=self.pointers,
                           outputs=outputs,
                           inputs=inputs,
                           debug=self.debug,
                           excel_compiler=False)
Exemplo n.º 17
0
 def cell_from_dict(d):
     return Cell.from_dict(d, cellmap=cellmap)