def clone(self): renamer = Renamer() label = copy_resop(self.label) prefix = [] for op in self.prefix: newop = copy_resop(op) renamer.rename(newop) if not newop.returns_void(): renamer.start_renaming(op, newop) prefix.append(newop) prefix_label = None if self.prefix_label: prefix_label = copy_resop(self.prefix_label) renamer.rename(prefix_label) oplist = [] for op in self.operations: newop = copy_resop(op) renamer.rename(newop) if not newop.returns_void(): renamer.start_renaming(op, newop) oplist.append(newop) jump = copy_resop(self.jump) renamer.rename(jump) loop = VectorLoop(copy_resop(self.label), oplist, jump) loop.prefix = prefix loop.prefix_label = prefix_label return loop
class SchedulerState(object): def __init__(self, graph): self.renamer = Renamer() self.graph = graph self.oplist = [] self.worklist = [] self.invariant_oplist = [] self.invariant_vector_vars = [] self.seen = {} def post_schedule(self): loop = self.graph.loop self.renamer.rename(loop.jump) self.ensure_args_unpacked(loop.jump) loop.operations = self.oplist loop.prefix = self.invariant_oplist if len(self.invariant_vector_vars) + len(self.invariant_oplist) > 0: # label args = loop.label.getarglist_copy() + self.invariant_vector_vars opnum = loop.label.getopnum() op = loop.label.copy_and_change(opnum, args) self.renamer.rename(op) loop.prefix_label = op # jump args = loop.jump.getarglist_copy() + self.invariant_vector_vars opnum = loop.jump.getopnum() op = loop.jump.copy_and_change(opnum, args) self.renamer.rename(op) loop.jump = op def profitable(self): return True def prepare(self): for node in self.graph.nodes: if node.depends_count() == 0: self.worklist.insert(0, node) def emit(self, node, scheduler): # implement me in subclass. e.g. as in VecScheduleState return False def delay(self, node): return False def has_more(self): return len(self.worklist) > 0 def ensure_args_unpacked(self, op): pass def post_emit(self, node): pass def pre_emit(self, node): pass
class SchedulerState(object): def __init__(self, cpu, graph): self.cpu = cpu self.renamer = Renamer() self.graph = graph self.oplist = [] self.worklist = [] self.invariant_oplist = [] self.invariant_vector_vars = [] self.seen = {} self.delayed = [] def resolve_delayed(self, needs_resolving, delayed, op): # recursive solving of all delayed objects if not delayed: return args = op.getarglist() if op.is_guard(): args = args[:] + op.getfailargs() for arg in args: if arg is None or arg.is_constant() or arg.is_inputarg(): continue if arg not in self.seen: box = self.renamer.rename_box(arg) needs_resolving[box] = None indexvars = self.graph.index_vars i = len(delayed) - 1 while i >= 0: node = delayed[i] op = node.getoperation() if op in needs_resolving: # either it is a normal operation, or we know that there is a linear combination del needs_resolving[op] if op in indexvars: opindexvar = indexvars[op] # there might be a variable already, that # calculated the index variable, thus just reuse it for var, indexvar in indexvars.items(): if indexvar == opindexvar and var in self.seen: self.renamer.start_renaming(op, var) break else: if opindexvar.calculated_by(op): # just append this operation self.seen[op] = None self.append_to_oplist(op) else: # here is an easier way to calculate just this operation last = op for operation in opindexvar.get_operations(): self.append_to_oplist(operation) last = operation indexvars[last] = opindexvar self.renamer.start_renaming(op, last) self.seen[op] = None self.seen[last] = None else: self.resolve_delayed(needs_resolving, delayed, op) self.append_to_oplist(op) self.seen[op] = None if len(delayed) > i: del delayed[i] i -= 1 # some times the recursive call can remove several items from delayed, # thus we correct the index here if len(delayed) <= i: i = len(delayed) - 1 def append_to_oplist(self, op): self.renamer.rename(op) self.oplist.append(op) def schedule(self): self.prepare() Scheduler().walk_and_emit(self) self.post_schedule() def post_schedule(self): loop = self.graph.loop jump = loop.jump if self.delayed: # some operations can be delayed until the jump instruction, # handle them here self.resolve_delayed({}, self.delayed, jump) self.renamer.rename(jump) loop.operations = self.oplist def profitable(self): return True def prepare(self): for node in self.graph.nodes: if node.depends_count() == 0: self.worklist.insert(0, node) def try_emit_or_delay(self, node): if not node.is_imaginary() and node.is_pure(): # this operation might never be emitted. only if it is really needed self.delay_emit(node) return # emit a now! self.pre_emit(node, True) self.mark_emitted(node) if not node.is_imaginary(): op = node.getoperation() self.seen[op] = None self.append_to_oplist(op) def delay_emit(self, node): """ it has been decided that the operation might be scheduled later """ delayed = node.delayed or [] if node not in delayed: delayed.append(node) node.delayed = None provides = node.provides() if len(provides) == 0: for n in delayed: self.delayed.append(n) else: for to in node.provides(): tnode = to.target_node() self.delegate_delay(tnode, delayed[:]) self.mark_emitted(node) def delegate_delay(self, node, delayed): """ Chain up delays, this can reduce many more of the operations """ if node.delayed is None: node.delayed = delayed else: delayedlist = node.delayed for d in delayed: if d not in delayedlist: delayedlist.append(d) def mark_emitted(state, node, unpack=True): """ An operation has been emitted, adds new operations to the worklist whenever their dependency count drops to zero. Keeps worklist sorted (see priority) """ worklist = state.worklist provides = node.provides()[:] for dep in provides: # COPY target = dep.to node.remove_edge_to(target) if not target.emitted and target.depends_count() == 0: # sorts them by priority i = len(worklist) - 1 while i >= 0: cur = worklist[i] c = (cur.priority - target.priority) if c < 0: # meaning itnode.priority < target.priority: worklist.insert(i + 1, target) break elif c == 0: # if they have the same priority, sort them # using the original position in the trace if target.getindex() < cur.getindex(): worklist.insert(i + 1, target) break i -= 1 else: worklist.insert(0, target) node.clear_dependencies() node.emitted = True if not node.is_imaginary(): op = node.getoperation() state.renamer.rename(op) if unpack: state.ensure_args_unpacked(op) state.post_emit(node) def delay(self, node): return False def has_more(self): return len(self.worklist) > 0 def ensure_args_unpacked(self, op): pass def post_emit(self, node): pass def pre_emit(self, orignode, pack_first=True): delayed = orignode.delayed if delayed: # there are some nodes that have been delayed just for this operation if pack_first: op = orignode.getoperation() self.resolve_delayed({}, delayed, op) for node in delayed: op = node.getoperation() if op in self.seen: continue if node is not None: provides = node.provides() if len(provides) == 0: # add this node to the final delay list # might be emitted before jump! self.delayed.append(node) else: for to in node.provides(): tnode = to.target_node() self.delegate_delay(tnode, [node]) orignode.delayed = None
def unroll_loop_iterations(self, loop, unroll_count, align_unroll_once=False): """ Unroll the loop `unroll_count` times. There can be an additional unroll step if alignment might benefit """ numops = len(loop.operations) renamer = Renamer() operations = loop.operations orig_jump_args = loop.jump.getarglist()[:] prohibit_opnums = (rop.GUARD_FUTURE_CONDITION, rop.GUARD_NOT_INVALIDATED, rop.DEBUG_MERGE_POINT) unrolled = [] if align_unroll_once: unroll_count += 1 # it is assumed that #label_args == #jump_args label_arg_count = len(orig_jump_args) label = loop.label jump = loop.jump new_label = loop.label for u in range(unroll_count): # fill the map with the renaming boxes. keys are boxes from the label for i in range(label_arg_count): la = label.getarg(i) ja = jump.getarg(i) ja = renamer.rename_box(ja) if la != ja: renamer.start_renaming(la, ja) # for i, op in enumerate(operations): if op.getopnum() in prohibit_opnums: continue # do not unroll this operation twice copied_op = copy_resop(op) if not copied_op.returns_void(): # every result assigns a new box, thus creates an entry # to the rename map. renamer.start_renaming(op, copied_op) # args = copied_op.getarglist() for a, arg in enumerate(args): value = renamer.rename_box(arg) copied_op.setarg(a, value) # not only the arguments, but also the fail args need # to be adjusted. rd_snapshot stores the live variables # that are needed to resume. if copied_op.is_guard(): self.copy_guard_descr(renamer, copied_op) # unrolled.append(copied_op) # if align_unroll_once and u == 0: descr = label.getdescr() args = label.getarglist()[:] new_label = ResOperation(rop.LABEL, args, descr) renamer.rename(new_label) # # the jump arguments have been changed # if label(iX) ... jump(i(X+1)) is called, at the next unrolled loop # must look like this: label(i(X+1)) ... jump(i(X+2)) args = loop.jump.getarglist() for i, arg in enumerate(args): value = renamer.rename_box(arg) loop.jump.setarg(i, value) # loop.label = new_label if align_unroll_once: loop.align_operations = operations loop.operations = unrolled else: loop.operations = operations + unrolled
class GuardStrengthenOpt(object): """ Note that this optimization is only used in the vector optimizer (yet) """ def __init__(self, index_vars): self.index_vars = index_vars self._newoperations = [] self.strength_reduced = 0 # how many guards could be removed? self.strongest_guards = {} self.guards = {} self.delayed = {} def collect_guard_information(self, loop): operations = loop.operations last_guard = None for i,op in enumerate(operations): op = operations[i] if not op.is_guard(): continue if op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE): guard = Guard.of(op.getarg(0), operations, i, self.index_vars) if guard is None: continue self.record_guard(guard.getleftkey(), guard) self.record_guard(guard.getrightkey(), guard) def record_guard(self, key, guard): if key is None: return # the operations are processed from 1..n (forward), # thus if the key is not present (1), the guard is saved # (2) guard(s) with this key is/are already present, # thus each of is seen as possible candidate to strengthen # or imply the current. in both cases the current guard is # not emitted and the original is replaced with the current others = self.strongest_guards.setdefault(key, []) if len(others) > 0: # (2) replaced = False for i,other in enumerate(others): assert guard is not other if guard.implies(other, self): # strengthend others[i] = guard self.guards[guard.index] = None # mark as 'do not emit' guard.inhert_attributes(other) self.guards[other.index] = guard replaced = True continue elif other.implies(guard, self): # implied self.guards[guard.index] = None # mark as 'do not emit' replaced = True continue if not replaced: others.append(guard) else: # (2) others.append(guard) def eliminate_guards(self, loop): self.renamer = Renamer() for i,op in enumerate(loop.operations): op = loop.operations[i] if op.is_guard(): if i in self.guards: # either a stronger guard has been saved # or it should not be emitted guard = self.guards[i] # this guard is implied or marked as not emitted (= None) self.strength_reduced += 1 if guard is None: continue guard.emit_operations(self) continue else: self.emit_operation(op) continue if not op.returns_void(): index_var = self.index_vars.get(op, None) if index_var: if not index_var.is_identity(): var = index_var.emit_operations(self, op) self.renamer.start_renaming(op, var) continue self.emit_operation(op) self.renamer.rename(loop.jump) # loop.operations = self._newoperations[:] def propagate_all_forward(self, info, loop, user_code=False): """ strengthens the guards that protect an integral value """ # the guards are ordered. guards[i] is before guards[j] iff i < j self.collect_guard_information(loop) self.eliminate_guards(loop) # assert len(info.versions) == 1 version = info.versions[0] for i,op in enumerate(loop.operations): if not op.is_guard(): continue descr = op.getdescr() if descr and descr.loop_version(): assert isinstance(descr, AbstractFailDescr) info.track(op, descr, version) if user_code: self.eliminate_array_bound_checks(info, loop) def emit_operation(self, op): self.renamer.rename(op) self._newoperations.append(op) def operation_position(self): return len(self._newoperations) def eliminate_array_bound_checks(self, info, loop): info.mark() version = None self._newoperations = [] for key, guards in self.strongest_guards.items(): if len(guards) <= 1: continue # there is more than one guard for that key, # that is why we could imply the guards 2..n # iff we add invariant guards one = guards[0] for other in guards[1:]: transitive_guard = one.transitive_imply(other, self, loop) if transitive_guard: if version is None: version = info.snapshot(loop) info.remove(other.op.getdescr()) other.set_to_none(info, loop) descr = transitive_guard.getdescr() assert isinstance(descr, AbstractFailDescr) info.track(transitive_guard, descr, version) info.clear() loop.prefix = self._newoperations + loop.prefix loop.operations = [op for op in loop.operations if op]
class GuardStrengthenOpt(object): """ Note that this optimization is only used in the vector optimizer (yet) """ def __init__(self, index_vars): self.index_vars = index_vars self._newoperations = [] self.strength_reduced = 0 # how many guards could be removed? self.strongest_guards = {} self.guards = {} self.delayed = {} def collect_guard_information(self, loop): operations = loop.operations last_guard = None for i, op in enumerate(operations): op = operations[i] if not op.is_guard(): continue if op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE): guard = Guard.of(op.getarg(0), operations, i, self.index_vars) if guard is None: continue self.record_guard(guard.getleftkey(), guard) self.record_guard(guard.getrightkey(), guard) def record_guard(self, key, guard): if key is None: return # the operations are processed from 1..n (forward), # thus if the key is not present (1), the guard is saved # (2) guard(s) with this key is/are already present, # thus each of is seen as possible candidate to strengthen # or imply the current. in both cases the current guard is # not emitted and the original is replaced with the current others = self.strongest_guards.setdefault(key, []) if len(others) > 0: # (2) replaced = False for i, other in enumerate(others): assert guard is not other if guard.implies(other, self): # strengthend others[i] = guard self.guards[guard.index] = None # mark as 'do not emit' guard.inhert_attributes(other) self.guards[other.index] = guard replaced = True continue elif other.implies(guard, self): # implied self.guards[guard.index] = None # mark as 'do not emit' replaced = True continue if not replaced: others.append(guard) else: # (2) others.append(guard) def eliminate_guards(self, loop): self.renamer = Renamer() for i, op in enumerate(loop.operations): op = loop.operations[i] if op.is_guard(): if i in self.guards: # either a stronger guard has been saved # or it should not be emitted guard = self.guards[i] # this guard is implied or marked as not emitted (= None) self.strength_reduced += 1 if guard is None: continue guard.emit_operations(self) continue else: self.emit_operation(op) continue if not op.returns_void(): index_var = self.index_vars.get(op, None) if index_var: if not index_var.is_identity(): var = index_var.emit_operations(self, op) self.renamer.start_renaming(op, var) continue self.emit_operation(op) self.renamer.rename(loop.jump) # loop.operations = self._newoperations[:] def propagate_all_forward(self, info, loop, user_code=False): """ strengthens the guards that protect an integral value """ # the guards are ordered. guards[i] is before guards[j] iff i < j self.collect_guard_information(loop) self.eliminate_guards(loop) # assert len(info.versions) == 1 version = info.versions[0] for i, op in enumerate(loop.operations): if not op.is_guard(): continue descr = op.getdescr() if descr and descr.loop_version(): assert isinstance(descr, AbstractFailDescr) info.track(op, descr, version) if user_code: self.eliminate_array_bound_checks(info, loop) def emit_operation(self, op): self.renamer.rename(op) self._newoperations.append(op) def operation_position(self): return len(self._newoperations) def eliminate_array_bound_checks(self, info, loop): info.mark() version = None self._newoperations = [] for key, guards in self.strongest_guards.items(): if len(guards) <= 1: continue # there is more than one guard for that key, # that is why we could imply the guards 2..n # iff we add invariant guards one = guards[0] for other in guards[1:]: transitive_guard = one.transitive_imply(other, self, loop) if transitive_guard: if version is None: version = info.snapshot(loop) info.remove(other.op.getdescr()) other.set_to_none(info, loop) descr = transitive_guard.getdescr() assert isinstance(descr, AbstractFailDescr) info.track(transitive_guard, descr, version) info.clear() loop.prefix = self._newoperations + loop.prefix loop.operations = [op for op in loop.operations if op]
class SchedulerState(object): def __init__(self, cpu, graph): self.cpu = cpu self.renamer = Renamer() self.graph = graph self.oplist = [] self.worklist = [] self.invariant_oplist = [] self.invariant_vector_vars = [] self.seen = {} self.delayed = [] def resolve_delayed(self, needs_resolving, delayed, op): # recursive solving of all delayed objects if not delayed: return args = op.getarglist() if op.is_guard(): args = args[:] + op.getfailargs() for arg in args: if arg is None or arg.is_constant() or arg.is_inputarg(): continue if arg not in self.seen: box = self.renamer.rename_box(arg) needs_resolving[box] = None indexvars = self.graph.index_vars i = len(delayed)-1 while i >= 0: node = delayed[i] op = node.getoperation() if op in needs_resolving: # either it is a normal operation, or we know that there is a linear combination del needs_resolving[op] if op in indexvars: opindexvar = indexvars[op] # there might be a variable already, that # calculated the index variable, thus just reuse it for var, indexvar in indexvars.items(): if indexvar == opindexvar and var in self.seen: self.renamer.start_renaming(op, var) break else: if opindexvar.calculated_by(op): # just append this operation self.seen[op] = None self.append_to_oplist(op) else: # here is an easier way to calculate just this operation last = op for operation in opindexvar.get_operations(): self.append_to_oplist(operation) last = operation indexvars[last] = opindexvar self.renamer.start_renaming(op, last) self.seen[op] = None self.seen[last] = None else: self.resolve_delayed(needs_resolving, delayed, op) self.append_to_oplist(op) self.seen[op] = None if len(delayed) > i: del delayed[i] i -= 1 # some times the recursive call can remove several items from delayed, # thus we correct the index here if len(delayed) <= i: i = len(delayed)-1 def append_to_oplist(self, op): self.renamer.rename(op) self.oplist.append(op) def schedule(self): self.prepare() Scheduler().walk_and_emit(self) self.post_schedule() def post_schedule(self): loop = self.graph.loop jump = loop.jump if self.delayed: # some operations can be delayed until the jump instruction, # handle them here self.resolve_delayed({}, self.delayed, jump) self.renamer.rename(jump) loop.operations = self.oplist def profitable(self): return True def prepare(self): for node in self.graph.nodes: if node.depends_count() == 0: self.worklist.insert(0, node) def try_emit_or_delay(self, node): if not node.is_imaginary() and node.is_pure(): # this operation might never be emitted. only if it is really needed self.delay_emit(node) return # emit a now! self.pre_emit(node, True) self.mark_emitted(node) if not node.is_imaginary(): op = node.getoperation() self.seen[op] = None self.append_to_oplist(op) def delay_emit(self, node): """ it has been decided that the operation might be scheduled later """ delayed = node.delayed or [] if node not in delayed: delayed.append(node) node.delayed = None provides = node.provides() if len(provides) == 0: for n in delayed: self.delayed.append(n) else: for to in node.provides(): tnode = to.target_node() self.delegate_delay(tnode, delayed[:]) self.mark_emitted(node) def delegate_delay(self, node, delayed): """ Chain up delays, this can reduce many more of the operations """ if node.delayed is None: node.delayed = delayed else: delayedlist = node.delayed for d in delayed: if d not in delayedlist: delayedlist.append(d) def mark_emitted(state, node, unpack=True): """ An operation has been emitted, adds new operations to the worklist whenever their dependency count drops to zero. Keeps worklist sorted (see priority) """ worklist = state.worklist provides = node.provides()[:] for dep in provides: # COPY target = dep.to node.remove_edge_to(target) if not target.emitted and target.depends_count() == 0: # sorts them by priority i = len(worklist)-1 while i >= 0: cur = worklist[i] c = (cur.priority - target.priority) if c < 0: # meaning itnode.priority < target.priority: worklist.insert(i+1, target) break elif c == 0: # if they have the same priority, sort them # using the original position in the trace if target.getindex() < cur.getindex(): worklist.insert(i+1, target) break i -= 1 else: worklist.insert(0, target) node.clear_dependencies() node.emitted = True if not node.is_imaginary(): op = node.getoperation() state.renamer.rename(op) if unpack: state.ensure_args_unpacked(op) state.post_emit(node) def delay(self, node): return False def has_more(self): return len(self.worklist) > 0 def ensure_args_unpacked(self, op): pass def post_emit(self, node): pass def pre_emit(self, orignode, pack_first=True): delayed = orignode.delayed if delayed: # there are some nodes that have been delayed just for this operation if pack_first: op = orignode.getoperation() self.resolve_delayed({}, delayed, op) for node in delayed: op = node.getoperation() if op in self.seen: continue if node is not None: provides = node.provides() if len(provides) == 0: # add this node to the final delay list # might be emitted before jump! self.delayed.append(node) else: for to in node.provides(): tnode = to.target_node() self.delegate_delay(tnode, [node]) orignode.delayed = None