def test_num_unique_combinations_greater_than_max_size(self) -> None: pattern = tuple([tuple(range(50))] * 50) max_sample_size = 2 all_options = [ComboOptions(max_sample_size=max_sample_size, with_replacement=True), ComboOptions(max_sample_size=max_sample_size, with_replacement=False)] for combo_options in all_options: actual_output = list(join_combo(pattern, combo_options=combo_options)) self.assertEqual(len(actual_output), max_sample_size)
def test_unique_one_d_to_mult_d(self) -> None: pattern = ((0, 1, 2, 3, 4, 5, 6, 7, 8, 9), (10, 11, 12, 13, 14, 15, 16, 17, 18, 19), (20, 21, 22, 23, 24, 25, 26, 27, 28, 29)) num_unique_combinations = 1000 # Subtract 1 from num_unique_combinations because if max_sample_size == num_unique_combinations, # the sampling will fall back to joining without sampling. This leads to a 0.1% chance that # there is an error, given that num_unique_combinations is 1000. combo_options = ComboOptions(max_sample_size=num_unique_combinations - 1, with_replacement=False) actual_output = list(join_combo(pattern, combo_options=combo_options)) self.assertEqual(len(set(actual_output)), len(actual_output))
def _combine() -> Iterable[Tuple[str, Sequence[str], Sequence[str]]]: for utterance_components in join_combo(utterance_combo, combo_options=combo_options): handled_tokens = _compute_handled_tokens( utterance_components, tokens, token_handler_map=token_handler_map) handled_groups = _compute_handled_groups(groups, handled_tokens, group_handler_map) yield ' '.join( utterance_components), handled_tokens, handled_groups
def _test_join_combo(self, pattern: Sequence[Sequence], expected_output: Iterable[Sequence], *, all_options: Optional[List[ComboOptions]] = None ) -> None: expected_output = tuple(expected_output) if not all_options: all_options = [ComboOptions(max_sample_size=len(expected_output), with_replacement=False), ComboOptions(max_sample_size=len(expected_output), with_replacement=True)] for combo_options in all_options: actual_output = list(join_combo(pattern, combo_options=combo_options)) if combo_options.with_replacement: self.assertEqual(len(actual_output), combo_options.max_sample_size) else: self.assertEqual(len(actual_output), len(set(actual_output))) self.assertEqual(len(actual_output), min(len(expected_output), combo_options.max_sample_size)) for actual in actual_output: self.assertIn(actual, expected_output)
def _expand_token_pattern(token_pattern: Tuple[Tuple[str, ...], ...]) -> Sequence[str]: return tuple(' '.join(phrase) for phrase in join_combo(token_pattern))