def clear(self) -> None: """Reset the multiplication cache, and cache statistics to an initial state. The inital state, has constants 0 and 1 preloaded. """ # Dictionaries keys in Python 3.8+ are in given in insertion order, # so we should insert 0 before 1. self.cache: Dict[int, Tuple[float, float, bool, Optional[List[Instruction]]]] = { 1: (0, 0, True, [ Instruction( "nop", 0, self.cpu_profile.costs["nop"]) ]), } for num, name in ((0, "zero"), (-1, "negate")): cost: float = self.cpu_profile.costs.get(name, inf_cost) insts: Optional[List[Any]] = [Instruction(name, 0, cost) ] if cost != inf_cost else None self.cache[num] = (cost, cost, True, insts) # The following help with search statistics self.hits_exact = 0 self.hits_partial = 0 self.misses = 0
def search_negate( self, n: int, upper: float, lower: float, instrs: List[Instruction], candidate_instrs: List[Instruction], ) -> Tuple[float, List[Instruction]]: if n < 0 and self.cpu_model.can_negate(): if self.debug: self.debug_msg(f"Looking at cached positive value {-n} of {n}") negate_cost = self.op_costs["negate"] lower += negate_cost if lower >= upper: # We have another cutoff if self.debug: self.debug_msg( f"**alpha cutoff in negate for {n} in cost {lower} >= {upper}" ) return upper, candidate_instrs cache_lower, cache_upper, finished, instrs = self.mult_cache[-n] if cache_upper == inf_cost: cache_upper, instrs = binary_sequence_inner(self, -n) cache_upper += negate_cost if cache_upper < upper: if self.debug: self.debug_msg( f"Negation {n} update {cache_upper} < {upper} ...") instrs.append(Instruction("negate", 0, negate_cost)) return cache_upper, instrs return upper, candidate_instrs
def try_shift_op_factor( self, n: int, # Number we are seeking factor: int, # factor to try to divide "n" by op: str, # operation after "shift"; either "add" or "subtract" shift_amount: int, # shift amount used in shift operation upper: float, # maximum allowed cost for an instruction sequence. lower: float, # cost of instructions seen so far instrs: List[Instruction], # We build on this. candidate_instrs: List[ Instruction], # If not empty, the best instruction sequence seen so for with cost "limit". ) -> Tuple[float, List[Instruction]]: if (n % factor) == 0: shift_cost = self.shift_cost(shift_amount) shift_op_cost = self.op_costs[op] + shift_cost # FIXME: figure out why lower != instruction_sequence_cost(instrs) lower = instruction_sequence_cost(instrs) + shift_op_cost if lower < upper: m = n // factor self.debug_msg(f"Trying factor {factor}...") try_cost, try_instrs = self.alpha_beta_search( m, lower=lower, limit=(upper - (lower - shift_op_cost))) if try_cost < upper - lower: try_instrs.append( Instruction("shift", shift_amount, shift_cost)) try_instrs.append( Instruction(op, FACTOR_FLAG, self.op_costs[op])) try_cost += shift_op_cost self.debug_msg( f"*update {n} using factor {factor}; cost {try_cost} < previous limit {upper}" ) self.mult_cache.update_field(n, upper=try_cost, instrs=try_instrs) # Upper is the cost for the entire sequence; the remaining cost is in "lower". # However, in candidate_cost we factored in the "shift_op_cost" so we need to remove that. upper = try_cost candidate_instrs = try_instrs pass pass return upper, candidate_instrs
def make_odd( self, n: int, cost: float, result: List[Instruction] ) -> Tuple[int, float, int]: """Handle low-order 0's with a single shift. Note: those machines that can only do a single shift of one place or those machines whose time varies with the shift amount, that is covered by the self.shift_cost function """ shift_amount, n = consecutive_zeros(n) if shift_amount: shift_cost = self.shift_cost(shift_amount) cost += shift_cost result.append(Instruction("shift", shift_amount, shift_cost)) pass return (n, cost, shift_amount)
def try_plus_offset( self, n: int, # Number we are seeking increment: int, # +1 or -1 for now limit: float, # maximum allowed cost for an instruction sequence. lower: float, # cost of instructions seen so far instrs: List[ Instruction], # If not empty, an instruction sequence with cost "limit". # We build on this. candidate_instrs: List[ Instruction], # The best current candidate sequencer. It or a different sequence is returned. op_flag, ) -> Tuple[float, List[Instruction]]: op_str = "add" if increment < 0 else "subtract" op_cost = self.op_costs[op_str] try_lower = lower + op_cost if try_lower < limit: n1 = n + increment cache_lower, neighbor_cost, finished, neighbor_instrs = self.mult_cache[ n1] if not finished: if self.debug: which = "lower" if n1 < n else "upper" self.debug_msg(f"Trying {which} neighbor {n1} of {n}...") pass neighbor_cost, neighbor_instrs = self.alpha_beta_search( n1, try_lower, limit=limit) try_cost = neighbor_cost + op_cost if try_cost < limit: self.debug_msg( f"*neighbor {n} update cost {try_cost}, previously {limit}." ) limit = try_cost neighbor_instrs.append(Instruction(op_str, op_flag, op_cost)) lower = min(self.mult_cache[n][0], try_cost) self.mult_cache.insert_or_update(n, lower, try_cost, False, neighbor_instrs) candidate_instrs = neighbor_instrs pass return limit, candidate_instrs
def alpha_beta_search(self, n: int, lower: float, limit: float) -> Tuple[float, List[Instruction]]: """Alpha-beta search n: is the (sub-)multiplier we are seeking at this point in the search. Note that it is *not* the initial multiplier sought. lower: is the cost used up until this point for the top-level searched multiplier. This number has to be added onto the cost for *n* when compared against the *limit* in order for multiplication via *n* to be considered a better sequence. limit: is the cost of the best sequence of instructions we've seen so far, and that is recorded in "results". We get this value initially using the binary method, but it can be lowered as we find better sequences. We return the lowest cost we can find using "n" in the sequence. Note that we don't return the cost of computing "n", but rather of the total sequence. If you subtract the "lower" value *on entry* than that is the cost of computing "n". """ self.debug_msg( f"alpha-beta search for {n} in at most {limit-lower} = max alotted cost: {limit}, incurred cost {lower}", 2, ) # FIXME: should be done in caller? cache_lower, cache_upper, finished, cache_instrs = self.mult_cache[n] if finished: self.debug_msg( f"alpha-beta using cache entry for {n} cost: {cache_upper}", -2) return cache_upper, [] if n == 1 else cache_instrs orig_n = n n, need_negation = self.need_negation(n) assert n > 0 instrs: List[Instruction] = [] m, shift_cost, shift_amount = self.make_odd(n, 0, instrs) lower += shift_cost if lower > limit: self.debug_msg( f"**alpha cutoff after shift for {n} incurred {lower} > {limit} alotted", -2, ) return inf_cost, [] # Make "m" negative if "n" was and search for that directly if need_negation: m = -m candidate_instrs: List[Instruction] = [] # If we have caching enabled, m != 1 since caching will catch earlier. # However for saftey and extreme cases where we don't have caching, # we will test here. if m in (-1, 0, 1): _, limit, _, candidate_instrs = self.mult_cache[m] limit += shift_cost lower = limit else: # FIXME: might be "limit - shift" cost, but possibly a bug in # add/subtract one will prevent a needed cost update when this # happens. Investigate and fix. search_limit = limit for fn in self.search_methods: candidate_upper, candidate_instrs = fn(self, m, search_limit, lower, instrs, candidate_instrs) if candidate_upper + shift_cost < search_limit: search_limit = candidate_upper self.debug_msg( f"*alpha-beta lowering limit of {m} cost to {search_limit} via search {fn.__name__}" ) pass pass if search_limit < limit: limit = search_limit + shift_cost pass if candidate_instrs: if shift_amount: candidate_instrs.append( Instruction("shift", shift_amount, shift_cost)) limit = instruction_sequence_cost(candidate_instrs) self.mult_cache.insert_or_update(orig_n, limit, limit, True, candidate_instrs) else: candidate_instrs = cache_instrs if not candidate_instrs: self.debug_msg( f"**cutoffs before anything found for {orig_n}; check/update instructions used to {limit - lower}" ) self.mult_cache.update_field(orig_n, lower=limit - lower) self.dedent() return limit, candidate_instrs
def binary_sequence_inner( self: MultConstClass, n: int) -> Tuple[float, List[Instruction]]: # noqa: C901 def append_instrs(cache_instrs: List[Instruction], bin_instrs, cache_upper: float) -> float: cache_instrs.reverse() # Because we compute in reverse order here bin_instrs += cache_instrs return cache_upper if n == 0: return (self.op_costs["zero"], [Instruction("zero", 0, self.op_costs["zero"])]) orig_n = n n, need_negation = self.need_negation(n) assert n > 0 bin_instrs: List[Instruction] = [] cost: float = 0 # total cost of sequence while n > 1: if need_negation: cache_lower, cache_upper, finished, cache_instrs = self.mult_cache[ -n] if cache_upper < inf_cost: cost += append_instrs(cache_instrs, bin_instrs, cache_upper) need_negation = False break cache_lower, cache_upper, finished, cache_instrs = self.mult_cache[n] if cache_upper < inf_cost: # If we were given a positive number, then we are done. # However if we were given a negative number, then from the # test above we now the negative version is not in the cache. # So we still have to continue in order to potentially find # a shorter sequence using a subtract. if not (need_negation and self.cpu_model.subtract_can_negate()): cost += append_instrs(cache_instrs, bin_instrs, cache_upper) break n, cost, shift_amount = self.make_odd(n, cost, bin_instrs) if n == 1: break # Handle low-order 1's via "adds", and also "subtracts" if subtracts are available. # one_run_count, m = consecutive_ones(n) try_reverse_subtract = need_negation and self.cpu_model.subtract_can_negate( ) if self.cpu_model.can_subtract() and (one_run_count > 2 or try_reverse_subtract): if try_reverse_subtract: cost += self.add_instruction(bin_instrs, "subtract", REVERSE_SUBTRACT_1) need_negation = False else: cost += self.add_instruction(bin_instrs, "subtract", OP_R1) n += 1 pass else: cost += self.add_instruction(bin_instrs, "add", OP_R1) n -= 1 pass pass bin_instrs.reverse() if need_negation: cost += self.add_instruction(bin_instrs, "negate", OP_R1) self.debug_msg( f"binary method for {orig_n} = {bin2str(orig_n)} has cost {cost}") self.mult_cache.update_sequence_partials(bin_instrs) return (cost, bin_instrs)
pass else: print(f"unknown op {instr.op}") cost += instr.cost self.insert_or_update(n, 0, cost, False, instrs[0:i + 1]) # noqa pass if __name__ == "__main__": multcache = MultCache() multcache.check() # Note: dictionaries keys in Python 3.8+ are in given in insertion order. assert list(multcache.keys()) == [1, 0, -1 ], "We should have at least -1, 0 and 1" multcache.check() multcache.insert(0, 1, 1, True, [Instruction("zero", 0, 1)]) multcache.check() multcache.insert_or_update(1, 0, 0, True, [Instruction("nop", 0, 0)]) multcache.check() instrs = [ Instruction("shift", 4), Instruction("add", 1), Instruction("shift", 2), Instruction("subtract", FACTOR_FLAG), Instruction("negate", 0), ] multcache.update_sequence_partials(instrs) multcache.check() from mult_by_const.io import dump
def add_instruction( self, bin_instrs: List[Instruction], op_name: str, op_flag: int ) -> float: cost = self.op_costs[op_name] bin_instrs.append(Instruction(op_name, op_flag, cost)) return cost