def __iter__(self): large_set = choice_fast(self.universe_size, self.large_set_size, self.random_state) small_set = choice_fast(large_set, self.small_set_size, self.random_state) self.union_ids.update(set(large_set)) set_ids_list = ([large_set] * self.num_large_sets + [small_set] * self.num_small_sets) for i in self.set_indices: yield set_ids_list[i] return self
def __iter__(self): for set_size in self.set_sizes: set_ids = choice_fast(self.universe_size, set_size, self.random_state) self.union_ids = self.union_ids.union(set_ids) yield set_ids return self
def _select_ids(lb, ub, size): lb = int(lb) ub = int(ub) candidate_ids = np.arange(lb, ub) if size >= ub - lb: return candidate_ids return choice_fast(candidate_ids, size, self.random_state)
def test_choice_fast_choose_elements_from_list(self): for i in range(50, 500): # Get a random list of numbers from 0 to 5000 size i elements = np.random.randint(0, 5000, i) # Choose up to i elements from that list chosen = random.choice_fast(elements, np.random.randint(1, i)) # Make sure chosen elements are actually from our original elements. for element in chosen: self.assertTrue(element in elements)
def __iter__(self): for set_size, pmf in zip(self.set_sizes, self.pmf_list): set_ids = choice_fast(self.universe_size, set_size, self.random_state) freq_per_id = self.random_state.choice(len(pmf), size=set_size, p=pmf) + 1 multiset_ids = [] for i, freq in zip(set_ids, freq_per_id): multiset_ids += [i] * freq self.random_state.shuffle(multiset_ids) yield multiset_ids return self
def __iter__(self): for i in range(len(self.set_size_list)): overlap_size = self.overlap_size_list[i] set_ids_overlapped = choice_fast(self.union_ids, overlap_size, self.random_state) set_size = self.set_size_list[i] set_ids_non_overlapped = self.ids_pool[:(set_size - overlap_size)] self.ids_pool = self.ids_pool[len(set_ids_non_overlapped):] self.union_ids = np.concatenate( [self.union_ids, set_ids_non_overlapped]) set_ids = np.concatenate( [set_ids_overlapped, set_ids_non_overlapped]) yield set_ids return self
def __iter__(self): for set_size, gamma_params in zip(self.set_sizes, self.gamma_params): set_ids = choice_fast(self.universe_size, set_size, self.random_state) rate_parameters = self.random_state.gamma(shape=gamma_params[0], scale=gamma_params[1], size=set_size) frequencies = self.random_state.poisson(lam=rate_parameters, size=set_size) + 1 if self.freq_cap: frequencies = np.minimum(frequencies, self.freq_cap) multiset_ids = [] for i, freq in zip(set_ids, frequencies): multiset_ids += [i] * freq self.random_state.shuffle(multiset_ids) yield multiset_ids return self
def test_choice_fast_is_unique(self): for i in range(50, 500): chosen = random.choice_fast(500, i) no_repeats = set(chosen) self.assertTrue(len(chosen) == len(no_repeats))
def test_choice_fast_len_is_m(self): for i in range(1000): a = random.choice_fast(10000, i) self.assertLen(a, i)
def test_choice_fast_same_random_state_same_output(self): rs1 = np.random.RandomState(1) rs2 = np.random.RandomState(1) a = random.choice_fast(10000, 5000, rs1) b = random.choice_fast(10000, 5000, rs2) self.assertSameElements(a, b)
def __iter__(self): self.union_ids.update( choice_fast(self.universe_size, self.set_size, self.random_state)) for _ in range(self.num_sets): yield list(self.union_ids) return self