def make_random_fam_3_gate(levels, gate_name, gate_factory): """Creates a random family 3 gate using gate_factory. A family 3 gate takes in two inputs, at least one of which is from the level directly above it. Args: levels: a list of lists of gates, one corresponding to each level already created in the circuit. gate_name: the name which the new gate will have. gate_factory: the method used to generate the new gate. """ W = len(levels[0]) assert(all(len(level) == W for level in levels)) # make sure that we have at least two possible inputs: assert(W * len(levels) > 1) # find the first input among the circuit objects in the ultimate level: input1_index = sr.randint(0, W - 1) input1 = levels[-1][input1_index] # find the second input among all available circuit objects: input2_index = sr.randint(0, (len(levels) * W) - 1) while input2_index == input1_index: input2_index = sr.randint(0, (len(levels) * W) - 1) input2_inp_index = input2_index % W input2_level_index = len(levels) - ((input2_index - input2_inp_index) / W) - 1 input2_inp_index = input2_index % W input2 = levels[input2_level_index][input2_inp_index] # create the gate: inputs = [input1, input2] negations = [sr.randbit() for neg_ind in xrange(2)] return gate_factory(gate_name, inputs, negations)
def make_random_one_inp_and_const_int_gate(L, ultimate_level, penultimate_level, gate_name, circuit, gate_factory): """creates a random gate with one input and a constant that is an integer. """ # This gate requires one input; at least one input should be available. assert (len(ultimate_level) + len(penultimate_level) > 0) input1_index = sr.randint(0, len(ultimate_level) - 1) input1 = ultimate_level[input1_index] const = sr.randint(0, L - 1) return gate_factory(gate_name, input1, const, circuit)
def generate_pdf(self, min, max, ind_vars): street_name = \ super(AddressDistribution, self).generate_pdf(min, max, ind_vars) house_number = spar_random.randint(1, 500) return_me = str(house_number) + " " + street_name if (spar_random.randint(0, 100) <= self.PERCENT_LIVING_IN_APT): apt_number = spar_random.randint(1, self.HIGHEST_APT_NUMBER) return_me += ", APT " + str(apt_number) return return_me
def generate_double_range(self, minim, maxim, **kwargs): """ Generate double sided range queries """ delta = self.upper_bound - self.zero_utc delta = delta.total_seconds() min_limit = int(minim * delta) max_limit = int(maxim * delta) range = spar_random.randint(min_limit, max_limit) lower = spar_random.randint(1, delta - range) upper = lower + range return (lower, upper)
def generate(self, ind_vars={}): """ Returns a random item that relects the distribution of add calls. The first time this is called it calls __counts_to_cdf to convert __counts to data structures that are more efficient for random number generation. The data structures are as follows: __total is the total number of times add() was called. __values[] is an array of the *distinct* values that were passed to add(). __cum_counts is a running sum of the number of observations that correspond to __values. So __cum_counts[0] is the number of times __values[0] was passed to add(). __cum_counts[1] is the number of times either __values[0] or __values[1] was passed to add() and so on. To quickly generate values with the right distribution we generate a random number in [min*__total, max*__total]. Where min and max are the cdf values we wish to generate between (normalized to be between 0 and 1). We then do a binary search to find the first index in __cum_counts that is >= the generated value. If we then return the corresponding value from __values we will generate from the right distribution. """ if self.__counts is not None: self.__counts_to_cdf() assert (self.__counts is None) assert self.__total > 0 x = spar_random.randint(1, self.__total) idx = bisect.bisect_left(self.__cum_counts, x) return self.__values[idx]
def generate(self, ind_vars=None): """ Returns a random item that relects the distribution of add calls. """ x = spar_random.randint(0, self.__randint_bound) return self._added_items[x]
def generate_less_than(self, minim, maxim, **kwargs): """ Generate less than queries """ delta = self.upper_bound - self.zero_utc delta = delta.total_seconds() return spar_random.randint(delta * minim, delta * maxim)
def __format_value(self, age): assert age < 140 assert age >= 0 dob = self.curr_year - age dob_obj = datetime.date(dob, 01, 01) fraction = spar_random.randint(0, 365) dob_obj = dob_obj - datetime.timedelta(fraction) return dob_obj
def generate_greater_than(self, minim, maxim, **kwargs): """ Generate greater than queries """ delta = self.upper_bound - self.zero_utc delta = delta.total_seconds() return int(delta - spar_random.randint((delta * minim) / 2.0, (delta * maxim) / 2.0))
def generate(self, *args): """ Returns a random social-security number, as a string, in format DDDDDDDDD. That is, it will match the regular expression \d{9}. Excluded Area Numbers are excluded (000, 666, and 900-999) but other than that, it is a random number. There is no guarantee that the number is unique. """ area_number = 0 while self._forbidden_area_number(area_number): area_number = spar_random.randint(0, 899) group_number = spar_random.randint(0, 99) serial_number = spar_random.randint(0, 9999) ssn_string = '%03d%02d%04d' % (area_number, group_number, serial_number) return ssn_string
def generate(self, *args): """ Generates a random fingerprint (character string) of some length chosen randomly from the range of valid lengths. """ fp_length = spar_random.randint(self.MIN_FINGERPRINT_SIZE, self.MAX_FINGERPRINT_SIZE) return_me = spar_random.bytes(fp_length) return bytearray(return_me)
def generate_less_than(self, minim, maxim, **kwargs): age = self.age_dist.generate_greater_than(minim, maxim, **kwargs) assert age < 140 assert age >= 0 dob = self.curr_year - age dob_obj = datetime.date(dob, 01, 01) fraction = spar_random.randint(int(365 * minim), int(365 * maxim)) dob_obj = dob_obj - datetime.timedelta(fraction) return dob_obj
def make_random_two_inp_gate(L, ultimate_level, penultimate_level, gate_name, circuit, gate_factory): """creates a random gate with two inputs.""" # This gate requires two inputs; at least two inputs should be available. assert (len(ultimate_level) + len(penultimate_level) > 1) input1_index = sr.randint(0, len(ultimate_level) - 1) input1 = ultimate_level[input1_index] input2_index = sr.randint(0, len(ultimate_level) + len(penultimate_level) - 1) while input2_index == input1_index: input2_index = sr.randint( 0, len(ultimate_level) + len(penultimate_level) - 1) if input2_index < len(ultimate_level): input2 = ultimate_level[input2_index] else: input2 = penultimate_level[input2_index - len(ultimate_level)] return gate_factory(gate_name, input1, input2, circuit)
def generate_double_range(self, minim, maxim, **kwargs): ''' ''' assert maxim >= minim and maxim <= 1 and minim >= 0 if self.__counts is not None: self.__counts_to_cdf() assert (self.__counts is None) assert self.__total > 0 max_limit = math.ceil(maxim * self.__total) min_limit = math.floor(max(1, self.__total * minim)) range = spar_random.randint(min_limit, max_limit) weight_lower = spar_random.randint(1, max(1, self.__total - range)) weight_upper = weight_lower + range id_lower = bisect.bisect_left(self.__sorted_counts, weight_lower) id_upper = bisect.bisect_left(self.__sorted_counts, weight_upper) if id_upper == len(self.__sorted_counts): id_upper -= 1 return (self.__sorted_values[id_lower], self.__sorted_values[id_upper])
def generate(self, *args): # Get the bit-length bit_length = spar_random.randint(0, 64) # Now, skep it a bit toward shorter numbers if bit_length == 64: if spar_random.randint(1, 5000) != 1: bit_length = spar_random.randint(0, 63) # Generate a random int of that bit-length if bit_length == 0: return 0 else: return_me = 1 random_bits = bit_length - 1 for _ in xrange(random_bits): return_me *= 2 return_me += spar_random.randbit() return return_me
def generate_less_than(self, minim, maxim, **kwargs): """ Generate ssn less than queries """ self.__total = 898999999.0 max_limit = math.ceil(maxim * self.__total) min_limit = math.floor(max(1, self.__total * minim)) ssn_value = spar_random.randint(min_limit, max_limit) ssn_string = '%09d' % (ssn_value + 10**6) return ssn_string
def generate_double_range(self, minim, maxim, **kwargs): """ Generate address double sided range queries """ house_number = spar_random.randint(1, 500) (street_name_low, street_name_up) = \ super(AddressDistribution, self).generate_double_range(minim, maxim, **kwargs) return_me_low = str(house_number) + " " + street_name_low return_me_up = str(house_number) + " " + street_name_up + ', APT 20' return (return_me_low, return_me_up)
def generate_double_range(self, minim, maxim, **kwargs): db_size = kwargs['db_size'] (age_low, age_upper) = self.age_dist.generate_double_range( minim, maxim, **kwargs) assert age_low <= age_upper if age_low == age_upper: dob = self.curr_year - age_low dob_obj_low = datetime.date(dob, 01, 01) fraction = spar_random.randint(1, int(min(365, maxim * db_size))) dob_obj_up = dob_obj_low + datetime.timedelta(fraction) else: dob_up = self.curr_year - age_low dob_low = self.curr_year - age_upper dob_obj_low = datetime.date(dob_low, 01, 01) dob_obj_up = datetime.date(dob_up, 01, 01) fraction = spar_random.randint(int(365 * minim), int(365 * maxim)) dob_obj_up = dob_obj_up + datetime.timedelta(fraction) return (dob_obj_low, dob_obj_up)
def balance(self, desired_output): """Changes the negations on the inputs of this gate so that the gate yeilds the bit desired_output.""" assert(desired_output == True or desired_output == False) current_eval = self.evaluate() # if the gate does not currently evaluate to the desired_output, tweak # the negations in such a way as to force it to evaluate to the # desired_output. This can be done by flipping a single negation: inp_ind = sr.randint(0, self.get_num_inputs() - 1) self._negate(inp_ind) self._set_value(desired_output)
def make_random_one_inp_and_const_inp_gate(L, ultimate_level, penultimate_level, gate_name, circuit, gate_factory): """creates a random gate with one input and a constant that is an input batch.""" # This gate requires one input; at least one input should be available. assert (len(ultimate_level) + len(penultimate_level) > 0) input1_index = sr.randint(0, len(ultimate_level) - 1) input1 = ultimate_level[input1_index] const = ci.Input([ib.IBMBatch([sr.randbit() for inp_ind in xrange(L)])]) return gate_factory(gate_name, input1, const, circuit)
def make_random_two_inp_and_const_inp_gate(L, ultimate_level, penultimate_level, gate_name, circuit, gate_factory): """creats a random gate with two inputs and a constant that is an input batch.""" # This gate requires two inputs; at least two inputs should be available. assert (len(ultimate_level) + len(penultimate_level) > 1) input1_index = sr.randint(0, len(ultimate_level) - 1) input1 = ultimate_level[input1_index] input2_index = sr.randint(0, len(ultimate_level) + len(penultimate_level) - 1) while input2_index == input1_index: input2_index = sr.randint( 0, len(ultimate_level) + len(penultimate_level) - 1) if input2_index < len(ultimate_level): input2 = ultimate_level[input2_index] else: input2 = penultimate_level[input2_index - len(ultimate_level)] const = ci.Input([ib.IBMBatch([sr.randbit() for inp_ind in xrange(L)])]) return gate_factory(gate_name, input1, input2, const, circuit)
def test_regress(self): NUM_INPUTS = 2 NUM_DATAPOINTS = sr.randint(5, 1000) ACCEPTIBLE_ERROR = 1.0 NUM_TRIALS = 2 for trial_num in xrange(NUM_TRIALS): A = float(sr.randint(1, 100)) B = float(sr.randint(1, 5)) C = float(sr.randint(1, 100)) inputs = [[ sr.randint(1, 200) for data_point in xrange(NUM_DATAPOINTS) ] for inp in xrange(NUM_INPUTS)] outputs = [ FUNCTION_TO_REGRESS_TEMPLATE( [inputs[inp][data_point] for inp in xrange(NUM_INPUTS)], A, B, C) + sr.random() for data_point in xrange(NUM_DATAPOINTS) ] function_guess = regression.regress( function_to_regress=FUNCTION_TO_REGRESS, outputs=outputs, inputs=inputs) test_inputs = [sr.randint(1, 100) for inp in xrange(NUM_INPUTS)] self.assertTrue( abs( function_guess.function(test_inputs) - FUNCTION_TO_REGRESS_TEMPLATE(test_inputs, A, B, C)) < ACCEPTIBLE_ERROR) rsquared = function_guess.get_rsquared(inputs, outputs) self.assertTrue(rsquared > .8)
def generate_double_range(self, minim, maxim, **kwargs): """ Generate ssn double sided range queries """ self.__total = 899999999.0 max_limit = math.ceil(maxim * self.__total) min_limit = math.floor(max(1, self.__total * minim)) range = max_limit - min_limit ssn_lower = spar_random.randint(1, self.__total - range) ssn_upper = ssn_lower + range ssn_string_lower = '%09d' % ssn_lower ssn_string_upper = '%09d' % ssn_upper return (ssn_string_lower, ssn_string_upper)
def generate(self, *args): """ Returns a random identifier which has not been returned by this object before. Heed class-level documentation about degenerate behavior as the supply of possible return-values shrinks over time. Sends a warning to the logging module when half or more of the numbers are used up. """ candidate = spar_random.randint(0, self._upper_bound) while candidate in self._returned: candidate = spar_random.randint(0, self._upper_bound) self._returned.add(candidate) # Start warning when less than half of values remain-- # performance will start to get slow! if self.percent_remaining < 0.5: logging.warning("RandIntWithoutReplacement object now "\ "half exhaused (less than half of possible "\ "outputs remain.) Performance will start to "\ "degrade.") return candidate
def generate_greater_than(self, record_set_size, db_size): '''Returns a single value which is the lower bound of a randomly generated range''' #find the nearest bit_length that has a count close to record_set_size cum_count = 0 density = self.__bucket_density(db_size) bit_length = None for b in xrange(64, 0, -1): cum_count += density[b] * ((2**b) - (2**(b - 1))) #either it is close enough or the count has gotten #bigger than the record_set_size and we need to stop if self.__close_enough(cum_count,record_set_size)\ or cum_count > record_set_size: bit_length = b break #this checks to see for the case where the record_set_size is #somewhere in the bitlength but the granularity is too fine so we #need for further refine where we are generating our random range #from if bit_length is None: raise FooInputs if not self.__close_enough(cum_count, record_set_size): records_in_range = record_set_size for b in xrange(64, bit_length, -1): records_in_range -= self.__prob_b(b) * db_size records_in_range = self.__prob_b( bit_length) * db_size - records_in_range range = int(records_in_range * (2**bit_length-2**(bit_length-1)) / \ (self.__prob_b(bit_length)*db_size)) else: range = 0 #Generate random value of that length if bit_length == 0: return 1 else: b = bit_length #generate a random start offset that is less than the size of the bucket #minus the randomly generated range. We only generate half of this value #randomly and then multiply by 2 because randint cannot handle values #large than 2**63 random_start_half = spar_random.randint( 1, (2**b - 2**(b - 1) - range) / 2) random_start = random_start_half * 2 #add that offset to the lower end of the bucket as well as the range value return random_start + 2**(b - 1) + range
def generate_pdf(self, minim, maxim, *ind_vars): ''' Returns a value with the given record set size (expressed as a percentage of database size), can be db_size agnostic becuase min and max are normalized ''' record_set_min = minim * 10**9 record_set_max = maxim * 10**9 bit_len = None for b in xrange(65): expected = self.__prob_b(b) * 10**9 / (2**b - 2**(b - 1)) if expected >= record_set_min and expected <= record_set_max: bit_len = b if bit_len is None: return 0 value = spar_random.randint(2**(bit_len - 1), 2**(bit_len) - 1) return value
def _dob_to_last_updated(self, dob): dob_datetime = datetime.datetime.combine(dob, self.midnight) lower_bound = max(self.zero_utc, dob_datetime) delta = self.upper_bound - lower_bound seconds_in_delta = delta.total_seconds() random_seconds = spar_random.randint(0, seconds_in_delta) if lower_bound == self.zero_utc: return int(random_seconds) else: dob_delta = dob_datetime - self.zero_utc dob_delta_seconds = dob_delta.total_seconds() return int(random_seconds + dob_delta_seconds)
def add_single_alarmword(self, alarmword): """ Adds a single word to a random location in the text. Note: adds space both before and after, even if this makes a double-space or a space between a word and punctuation. Used for adding alarmwords to text. """ assert not self._alarmwords_added, "Cannot add alarmwords more than once" # add the new word and stem to internal lists word_upper = alarmword.upper() stem = spar_stemming.stem_word(word_upper) end_first_trigram = 6 begin_last_trigram = len(self.word_list) - 6 insert_point = spar_random.randint(end_first_trigram, begin_last_trigram) # Note: why do we force the insert of leading and following spaces? # We *could* try to do something smarter, and only insert one space # to keep the spacing as in natural englilsh. But we (Jon) are not # smart enough to get this right, and the consqeuence of getting # this wrogn may be inadvertantly putting two words together with no # space in between. This will lead to an extremely subtle bug where # the aggregators and the databases disagree about what words are # in a asentence. Putting in extra spaces, on the other hand, shouldn't # break anything (and is even to be expected in real human-generated # text. # Note: the order of the next loop is fragile: for (word, stem, upper) in [(' ', None, ' '), (alarmword, stem, word_upper), (' ', None, ' ')]: self.word_list.insert(insert_point, word) self.stem_list.insert( insert_point, stem, ) self.upper_word_list.insert(insert_point, upper) self._reset_helpers() self._alarmwords_added = True self.alarmwords = [alarmword] self.alarmword_distances = None
def balance(self, desired_output): """Changes the negations on the inputs of this gate so that the gate yeilds the bit desired_output.""" assert (desired_output == True or desired_output == False) current_eval = self.evaluate() # if the gate does not currently evaluate to the desired_output, tweak # the negations in such a way as to force it to evaluate to the # desired_output: if current_eval != desired_output: if (desired_output == False): # if the gate does not currently evaluate to False, we can get # it to do so by flipping a single negation: inp_ind = sr.randint(0, self.get_num_inputs() - 1) self._negate(inp_ind) else: # Getting an AND to evaluate to True is harder; we must make # sure that every input evaluates to True. for bal_ind2 in xrange(self.get_num_inputs()): if self._get_value_with_negation(bal_ind2) == False: self._negate(bal_ind2) self._set_value(desired_output)
def test_balancing_randomized(self): """ Test to determine that balancing forces the desired output. Tests many randomized cases. """ num_tests = 100 min_num_inputs = 2 max_num_inputs = 20 for test_num in xrange(num_tests): num_inputs = sr.randint(min_num_inputs, max_num_inputs) inputs = [ sw.StealthInputWire("wire", bool(sr.randbit())) for input_num in xrange(num_inputs) ] negations = [ bool(sr.randbit()) for input_num in xrange(num_inputs) ] gate = sgo.StealthOrGate("or_gate", inputs, negations) desired_output = bool(sr.randbit()) gate.balance(desired_output) self.assertEqual(gate.evaluate(), desired_output)