def __call__(self): if self.time_thunks: for cont in self.pre_call_clear: cont[0] = None try: i = 0 for thunk, node, old_storage in zip(self.thunks, self.nodes, self.post_thunk_clear): t0 = time.time() thunk() t1 = time.time() self.call_counts[i] += 1 self.call_times[i] += t1 - t0 for old_s in old_storage: old_s[0] = None i += 1 except Exception: raise_with_op(self.fgraph, node, thunk) else: for cont in self.pre_call_clear: cont[0] = None try: for thunk, node, old_storage in zip(self.thunks, self.nodes, self.post_thunk_clear): thunk() for old_s in old_storage: old_s[0] = None except Exception: raise_with_op(self.fgraph, node, thunk)
def f(): for inputs in input_lists[1:]: for input1, input2 in zip(inputs0, inputs): input2.storage[0] = copy(input1.storage[0]) for x in to_reset: x[0] = None pre(self, [input.data for input in input_lists[0]], order, thunk_groups) for i, (thunks, node) in enumerate(zip(thunk_groups, order)): try: wrapper(self.fgraph, i, node, *thunks) except Exception: raise_with_op(self.fgraph, node, *thunks)
def __call__(self): if self.time_thunks: for cont in self.pre_call_clear: cont[0] = None try: for i, (thunk, node) in enumerate(zip(self.thunks, self.nodes)): t0 = time.time() thunk() t1 = time.time() self.call_counts[i] += 1 self.call_times[i] += t1 - t0 except Exception: raise_with_op(self.fgraph, node, thunk) else: for cont in self.pre_call_clear: cont[0] = None try: for thunk, node in zip(self.thunks, self.nodes): thunk() except Exception: raise_with_op(self.fgraph, node, thunk)
def __call__(self, output_subset=None): storage_map = self.storage_map compute_map = self.compute_map thunks = self.thunks dependencies = self.dependencies self.node_executed_order = [] self.node_cleared_order = [] for cont in self.pre_call_clear: cont[0] = None for k in self.storage_map: compute_map[k][0] = k.owner is None if self.callback_input and compute_map[k][0]: self.callback_input(k, self.storage_map[k][0]) # apply_stack contains nodes if output_subset is not None: first_updated = len(self.outputs) - self.n_updates output_subset = output_subset + list( range(first_updated, len(self.outputs))) apply_stack = [ self.outputs[i].owner for i in output_subset if self.outputs[i].owner ] else: apply_stack = list(self.base_apply_stack) last_apply_stack_len = -1 # This record all function inputs/shared variables and constants for var, data in self.storage_map.items(): if data[0] is None: continue if hasattr(var.type, "get_shape_info"): sh = var.type.get_shape_info(data[0]) else: sh = "no shape" self.variable_shape[var] = sh st = getattr(data[0], "strides", "no strides") if getattr(data[0], "flags", False) and data[0].flags.c_contiguous: st = "c" elif hasattr(data[0], "is_c_contiguous") and data[0].is_c_contiguous(): st = "c" self.variable_strides[var] = st off = getattr(data[0], "offset", "") self.variable_offset[var] = off while apply_stack: # Make sure something happened last time round. This is # just a safety check to make sure the op is written # correctly apply_stack should either decrease in length # by one (a thunk successfully applied), or increase in # length (added dependencies over and above the original). # NB: this doesn't catch cycles (would be too expensive/slow), # just stalls. apply_stack_len = len(apply_stack) assert apply_stack_len != last_apply_stack_len last_apply_stack_len = apply_stack_len current_apply = apply_stack.pop() current_inputs = current_apply.inputs current_outputs = current_apply.outputs current_deps = current_inputs + current_apply.destroy_dependencies computed_ins = all(compute_map[v][0] for v in current_deps) computed_outs = all(compute_map[v][0] for v in current_outputs) if not thunks[self.node_idx[current_apply]].lazy: # # stack loop: Normal Non-Lazy Case # ================================ # # Check if all inputs are in place # If so compute thunk and remove it from the apply_stack # If not leave it in, and add to the apply_stack those # that will produce you those inputs if computed_ins and not computed_outs: # -- Non-lazy case: have inputs, time to compute outputs try: _, dt = self.run_thunk_of_node(current_apply) del _ if config.profile or config.print_global_stats: current_idx = self.node_idx[current_apply] self.call_counts[current_idx] += 1 self.call_times[current_idx] += dt # Computing the memory footprint of the the op # ?? What about inplace .. if the op is inplace # you don't actually ask for more memory! for (idx, o) in enumerate(thunks[ self.node_idx[current_apply]].outputs): var = self.nodes[current_idx].outputs[idx] if hasattr(var.type, "get_shape_info"): sh = var.type.get_shape_info(o[0]) else: sh = "no shape" self.variable_shape[var] = sh st = getattr(o[0], "strides", "no strides") if (getattr(o[0], "flags", False) and o[0].flags.c_contiguous): st = "c" elif (hasattr(o[0], "is_c_contiguous") and o[0].is_c_contiguous()): st = "c" self.variable_strides[var] = st off = getattr(o[0], "offset", "") self.variable_offset[var] = off except Exception: raise_with_op( self.fgraph, current_apply, self.thunks[self.node_idx[current_apply]], storage_map=storage_map, ) for o in current_apply.outputs: compute_map[o][0] = 1 input_index = [] # A list store the index of inputs variables if self.allow_gc: for i in current_apply.inputs: # Garbage Collection -> check if anybody else uses # this input if dependencies[ i] and i.owner and i not in self.outputs: if all(compute_map[v][0] for v in dependencies[i]): storage_map[i][0] = None input_index.append( current_apply.inputs.index(i)) # DO NOT set compute_map to 0 # If values become False and the # current_apply is still in the # stack, this will cause it to be # recomputed! This can cause wrong value # with some combination of inplace op. compute_map[i][0] = 2 if (config.warn__vm_gc_bug and current_apply in apply_stack and getattr( current_apply.op, "destroy_map", False)): warnings.warn( "There was a bug that existed in " "the default Theano configuration," " only in the development version " "between July 5th 2012 and " "July 30th 2012. This was not in " "a released version. The bug was " "affecting this script.", # The stack level is not good when # inside a Scan. stacklevel=3, ) self.node_cleared_order.append(input_index) elif not computed_ins: # -- Non-lazy case, need inputs apply_stack.append(current_apply) apply_stack.extend(inp.owner for inp in current_deps if inp.owner) elif not computed_outs: # # stack loop: Lazy Evaluation Case # ================================ # # Lazy evaluation protocol is to run the thunk with the # current storage_map and compute_map accessed via closure, # and the thunk will return a list of variables from its input # list that it requires. try: requires, dt = self.run_thunk_of_node(current_apply) current_idx = self.node_idx[current_apply] self.call_counts[current_idx] += 1 self.call_times[current_idx] += dt except Exception: raise_with_op( self.fgraph, current_apply, self.thunks[self.node_idx[current_apply]], storage_map=storage_map, ) if requires: for r in requires: # We are not done with this op .. so we added # back and see to get the inputs we are # missing apply_stack.append(current_apply) if current_apply.inputs[r].owner: apply_stack.append(current_apply.inputs[r].owner) else: if config.profile or config.print_global_stats: for (idx, o) in enumerate( thunks[self.node_idx[current_apply]].outputs): var = self.nodes[ self.node_idx[current_apply]].outputs[idx] if hasattr(var.type, "get_shape_info"): sh = var.type.get_shape_info(o[0]) else: sh = "no shape" self.variable_shape[var] = sh st = getattr(o[0], "strides", "no strides") if (getattr(o[0], "flags", False) and o[0].flags.c_contiguous): st = "c" elif (hasattr(o[0], "is_c_contiguous") and o[0].is_c_contiguous()): st = "c" self.variable_strides[var] = st off = getattr(o[0], "offset", "") self.variable_offset[var] = off input_index = [] if self.allow_gc: for i in current_apply.inputs: if dependencies[ i] and i.owner and i not in self.outputs: empty_storage_map = True for x in dependencies[i]: if not compute_map[x][0]: empty_storage_map = False break if empty_storage_map: storage_map[i][0] = None input_index.append( current_apply.inputs.index(i)) # See the not lazy gc code for explanations # of compute_map change compute_map[i][0] = 2 self.node_cleared_order.append(input_index) # Hacky coarse gc final pass # This is required until we have a proper gc algorithm for graphs with # lazy evaluation. See discussion on theano-dev June 19 2012. final_index = [] if self.allow_gc: for v in storage_map: if v.owner and v not in self.outputs: if compute_map[v][0] == 2: continue else: storage_map[v][0] = None final_index.append(v) compute_map[v][0] = 2 self.node_cleared_order.append(final_index)