def test_choice_distribution(self): from faker.utils.distribution import choice_distribution a = ('a', 'b', 'c', 'd') p = (0.5, 0.2, 0.2, 0.1) sample = choice_distribution(a, p) self.assertTrue(sample in a) with open(os.path.join(TEST_DIR, 'random_state.json'), 'r') as fh: random_state = json.load(fh) random_state[1] = tuple(random_state[1]) random.setstate(random_state) samples = [choice_distribution(a, p) for i in range(100)] a_pop = len([i for i in samples if i == 'a']) b_pop = len([i for i in samples if i == 'b']) c_pop = len([i for i in samples if i == 'c']) d_pop = len([i for i in samples if i == 'd']) boundaries = [] tolerance = 5 for probability in p: boundaries.append([100 * probability + tolerance, 100 * probability - tolerance]) self.assertTrue(boundaries[0][0] > a_pop > boundaries[0][1]) self.assertTrue(boundaries[1][0] > b_pop > boundaries[1][1]) self.assertTrue(boundaries[2][0] > c_pop > boundaries[2][1]) self.assertTrue(boundaries[3][0] > d_pop > boundaries[3][1])
def horse_dob(self): '''get a date of birth for a live horse, assuming horses live up to 40 years but distribution favours younger horses''' this_year = date.today().year years = [y for y in range(this_year, this_year - 40, -1)] # note that these do not add up to 100 and they are pure guesswork p = [ 0.06, 0.06, 0.06, 0.06, 0.05, 0.05, 0.05, 0.04, 0.04, 0.04, 0.03, 0.03, 0.03, 0.03, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.005, 0.005, 0.005, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001 ] # get a year - distributed around average horse age year = choice_distribution(years, p) # get a month - in northern hemisphere most horses are born in spring #TODO: handle date of birth for southern hemisphere months = [m for m in range(1, 13)] p = [0.2, 0.2, 0.2, 0.2, 0.1, 0.5, 0.2, 0.1, 0.01, 0.01, 0.01, 0.01] month = choice_distribution(months, p) # get day of birth dates = calendar.Calendar().itermonthdates(year, month) dob = random.choice([date for date in dates if date.month == month]) # return in format YYYY-MM-DD return str(dob)
def test_choice_distribution(self): a = ('a', 'b', 'c', 'd') p = (0.5, 0.2, 0.2, 0.1) sample = choice_distribution(a, p) self.assertTrue(sample in a) with open(os.path.join(TEST_DIR, 'random_state.json'), 'r') as fh: random_state = json.load(fh) random_state[1] = tuple(random_state[1]) random.setstate(random_state) samples = [choice_distribution(a, p) for i in range(100)] a_pop = len([i for i in samples if i == 'a']) b_pop = len([i for i in samples if i == 'b']) c_pop = len([i for i in samples if i == 'c']) d_pop = len([i for i in samples if i == 'd']) boundaries = [] tolerance = 5 for probability in p: boundaries.append( [100 * probability + tolerance, 100 * probability - tolerance]) self.assertTrue(boundaries[0][0] > a_pop > boundaries[0][1]) self.assertTrue(boundaries[1][0] > b_pop > boundaries[1][1]) self.assertTrue(boundaries[2][0] > c_pop > boundaries[2][1]) self.assertTrue(boundaries[3][0] > d_pop > boundaries[3][1])
def age(cls, minor=False): if minor: # kids' ages are pretty evenly distributed.. return cls.random_int(0, 20) random_range = choice_distribution(cls.age_ranges_US, cls.age_freq_US) return random.randint(*random_range)
def random_element(self, elements=('a', 'b', 'c')): """ Returns a random element from a passed object. If `elements` is a dictionary, the value will be used as a weighting element. For example:: random_element({"{{variable_1}}": 0.5, "{{variable_2}}": 0.2, "{{variable_3}}": 0.2, "{{variable_4}}": 0.1}) will have the following distribution: * `variable_1`: 50% probability * `variable_2`: 20% probability * `variable_3`: 20% probability * `variable_4`: 10% probability """ if isinstance(elements, dict): choices = elements.keys() probabilities = elements.values() return choice_distribution( list(choices), list(probabilities), self.generator.random) else: return self.generator.random.choice(list(elements))
def ueln(self, country): # choose breed society (PIO) pio = choice_distribution(self.pios[country], self.pios_distribution[country]) # create random id id = randint(100000, 999999999) return "%s-%s-%s" % (country, pio, id)
def horse_sex(self): ''' Many male horses are gelded/neutered, so the code for sex includes this option. Rarely a female horse can also be neutered. The full list is: 00 - Not Known 10 - Male - entire/neutered not known 11 - Stallion - entire 12 - Gelding 20 - Female - neutered not known 21 - Mare 22 - Neutered female 30 - Hermaphrodite :return: ''' sex_choices = list(self.SEX.keys()) return choice_distribution(sex_choices, self.SEX_PROPORTIONS)
def random_element(cls, elements=('a', 'b', 'b')): """ Returns a random element from a passed object. If `elements` is a dictionary, the value will be used as a weighting element. For example:: random_element({"{{variable_1}}": 0.5, "{{variable_2}}": 0.2, "{{variable_3}}": 0.2, "{{variable_4}}": 0.1}) will have the following distribution: * `variable_1`: 50% probability * `variable_2`: 20% probability * `variable_3`: 20% probability * `variable_4`: 10% probability """ if isinstance(elements, dict): choices = elements.keys() probabilities = elements.values() return choice_distribution(list(choices), list(probabilities)) else: return random.choice(list(elements))
def GetFakerDataAsPercent(arr, arrp): '''arr = ('a', 'b', 'c', 'd') arrp = (0.5, 0.2, 0.2, 0.1) ''' data = choice_distribution(arr, arrp) return
def first_name_male(cls): return choice_distribution(cls.first_names_male_US, cls.first_names_male_freq_US)
def last_name(cls): return choice_distribution(cls.last_names_US, cls.last_name_freq_US)
def country_of_birth(self): return choice_distribution(self.population, self.population_distribution)
def horse_color(self): #https://en.wikipedia.org/wiki/Equine_coat_color #http://www.animalgenetics.us/Equine/CCalculator1.asp return choice_distribution(self.COLORS, self.COLOURS_PROPORTIONS)
def city(self): """ :example 'Cork' """ return choice_distribution(self.population, self.population_distribution)