def generate_random(self, generated_randgens=None): if generated_randgens is None: generated_randgens = dict() if self.randgen is not None and self.randgen in generated_randgens: if generated_randgens[self.randgen]: pass # Must be generated else: return Example() # Cannot be generated elif self.randgen is not None: if randint(0, 99) >= self.percentgen: # Don't generated this randgen generated_randgens[self.randgen] = False return Example() else: # Generate this randgen generated_randgens[self.randgen] = True # Generate the string according to the parameters of the object generated_str = self.words if self.casegen: generated_str = randomly_change_case(generated_str) if self.leading_space and may_get_leading_space(generated_str): generated_str = ' ' + generated_str return Example(generated_str)
def _generate_random_strategy(self): generated_example = Example() randgen_mapping = dict() for content in self._contents: generated_example.append( content.generate_random(randgen_mapping=randgen_mapping)) return generated_example
def test_mapping_compatible(self): ex1 = Example("test1") ex2 = Example("test2") setattr(ex1, RANDGEN_MAPPING_KEY, {"name": True}) setattr(ex2, RANDGEN_MAPPING_KEY, {"name": True}) assert can_concat_examples(ex1, ex2) assert can_concat_examples(ex2, ex1) setattr(ex1, RANDGEN_MAPPING_KEY, {"name": False}) setattr(ex2, RANDGEN_MAPPING_KEY, {"name": False}) assert can_concat_examples(ex1, ex2) assert can_concat_examples(ex2, ex1) setattr(ex1, RANDGEN_MAPPING_KEY, {"other": True}) setattr(ex2, RANDGEN_MAPPING_KEY, {"name": True}) assert can_concat_examples(ex1, ex2) assert can_concat_examples(ex2, ex1) setattr(ex1, RANDGEN_MAPPING_KEY, {"other": False}) setattr(ex2, RANDGEN_MAPPING_KEY, {"name": True}) assert can_concat_examples(ex1, ex2) assert can_concat_examples(ex2, ex1) setattr(ex1, RANDGEN_MAPPING_KEY, {"name": False}) setattr(ex2, RANDGEN_MAPPING_KEY, {"name": False, "name2": True}) assert can_concat_examples(ex1, ex2) assert can_concat_examples(ex2, ex1) setattr(ex1, RANDGEN_MAPPING_KEY, {"name": False, "name2": True}) setattr(ex2, RANDGEN_MAPPING_KEY, {"name": False, "name2": True}) assert can_concat_examples(ex1, ex2) assert can_concat_examples(ex2, ex1)
def generate_random(self, generated_randgens=None): if generated_randgens is None: generated_randgens = dict() self.check_casegen() # Manage randgen if self.randgen is not None and self.randgen in generated_randgens: if generated_randgens[self.randgen]: pass # Must be generated else: return Example() # Cannot be generated elif self.randgen is not None: if randint(0, 99) >= self.percentgen: # Don't generated this randgen if self.randgen != "": generated_randgens[self.randgen] = False return Example() elif self.randgen != "": # Generate this randgen generated_randgens[self.randgen] = True generated_example = self.parser.get_definition(self.name, UnitType.alias) \ .generate_random(self.variation_name, self.arg_value) if self.casegen: generated_example.text = \ randomly_change_case(generated_example.text) if self.leading_space and may_get_leading_space( generated_example.text): generated_example.text = ' ' + generated_example.text return generated_example
def generate_all(self): self.check_casegen() generated_examples = [] if self.randgen is not None: generated_examples.append(Example()) aliases = self.parser \ .get_definition(self.name, UnitType.alias) \ .generate_all(self.variation_name, self.arg_value) generated_examples.extend(aliases) if self.leading_space: for (i, ex) in enumerate(generated_examples): if may_get_leading_space(ex.text): generated_examples[i].text = ' ' + ex.text if self.casegen: tmp_buffer = [] for ex in generated_examples: if may_change_leading_case(ex.text): tmp_buffer.append( Example(with_leading_lower(ex.text), ex.entities)) tmp_buffer.append( Example(with_leading_upper(ex.text), ex.entities)) else: tmp_buffer.append(ex) generated_examples = tmp_buffer return generated_examples
def test_no_mapping(self): ex1 = Example("test1") ex2 = Example() assert can_concat_examples(ex1, ex2) setattr(ex1, RANDGEN_MAPPING_KEY, {"randgen name": True}) assert can_concat_examples(ex1, ex2) assert can_concat_examples(ex2, ex1)
def test_empty_mappings(self): ex1 = Example() ex2 = Example() assert merge_randgen_mappings(ex1, ex2) is None ex1 = Example("test1") ex2 = Example("test2") assert merge_randgen_mappings(ex1, ex2) is None
def test_other(self): example = Example("test") example = with_leading_lower(example) assert example.text == "test" example = Example(" \talinea") example = with_leading_lower(example) assert example.text == " \talinea"
def test_single(self): ex1 = Example("test1") ex2 = Example() mapping = {"name": True} setattr(ex1, RANDGEN_MAPPING_KEY, mapping) concated = concat_examples_with_randgen(ex1, ex2) assert concated.text == ex1.text assert getattr(concated, RANDGEN_MAPPING_KEY, None) == mapping
def test_no_mapping(self): example = Example() assert modify_example(clone(example), dict()) == example example = Example("test") assert modify_example(clone(example), dict()) == example example = Example("test with $ARG") assert modify_example(clone(example), dict()) == example
def test_modify_example(self): example = Example("text") for _ in range(5): modify_example(example) assert example.text in ("text", "Text") example = Example(" \talinea") for _ in range(5): modify_example(example) assert example.text in (" \talinea", " \tAlinea")
def test_no_replacement(self): mapping = {"argument": "replaced"} example = Example() assert modify_example(clone(example), mapping) == example example = Example("test") assert modify_example(clone(example), mapping) == example example = Example("test with $ARG") assert modify_example(clone(example), mapping) == example
def generate_random(self, variation_name=None, arg_value=None): """ Generates one of the rules at random and returns the string generated and the entities inside it as a dict. This is the only kind of definition that will generate an entity. """ # (str, str) -> {"text": str, "entities": [{"slot-name": str, "text": str, "value": str}]} if (arg_value is not None and arg_value not in self.arg_values_encountered): # Memorize arg value self.arg_values_encountered.append(arg_value) chosen_rule = None if variation_name is None: chosen_rule = choose(self.rules) else: if variation_name not in self.variations: raise SyntaxError("Couldn't find a variation named '" + variation_name + "' for " + self.type + " '" + self.name + "'") chosen_rule = choose(self.variations[variation_name]) if chosen_rule is None: # No rule return Example() if len(chosen_rule) <= 0: raise ValueError("Tried to generate an entity using an empty rule " + "for slot named '" + self.name + "'") generated_example = Example() for token in chosen_rule: generated_token = token.generate_random() generated_example.text += generated_token.text generated_example.entities.extend(generated_token.entities) if self.modifiers.casegen and self.can_have_casegen(): generated_example.text = randomly_change_case(generated_example.text) # Replace `arg` inside the generated sentence generated_example.text = ENTITY_MARKER + \ self._replace_arg(generated_example.text, arg_value).strip() # Strip for safety # Add the entity in the list slot_value = chosen_rule[0].name if not isinstance(chosen_rule[0], DummySlotValRuleContent): slot_value = generated_example.text[len(ENTITY_MARKER):] # Replace the argument by its value if needed slot_value = self._replace_arg(slot_value, arg_value) generated_example.entities.append({ "slot-name": self.name, "text": generated_example.text[len(ENTITY_MARKER):], "value": slot_value, }) return generated_example
def test_merge(self): ex1 = Example("test1") ex2 = Example("test2") mapping1 = {"name": True, "other": False} setattr(ex1, RANDGEN_MAPPING_KEY, mapping1) mapping2 = {"other": False, "third": True} setattr(ex2, RANDGEN_MAPPING_KEY, mapping2) assert \ merge_randgen_mappings(ex1, ex2) == \ {"name": True, "other": False, "third": True}
def test_concat(self): ex1 = Example("test1") ex2 = Example("test2") mapping1 = {"name": True} mapping2 = {"other": False} setattr(ex1, RANDGEN_MAPPING_KEY, mapping1) setattr(ex2, RANDGEN_MAPPING_KEY, mapping2) concated = concat_examples_with_randgen(ex1, ex2) assert concated.text == "test1test2" assert \ getattr(concated, RANDGEN_MAPPING_KEY, None) == \ {"name": True, "other": False}
def test_no_merge(self): ex1 = Example("test1") ex2 = Example("test2") mapping = {"name": True} setattr(ex1, RANDGEN_MAPPING_KEY, mapping) assert merge_randgen_mappings(ex1, ex2) == mapping ex1 = Example("test1") setattr(ex2, RANDGEN_MAPPING_KEY, mapping) assert merge_randgen_mappings(ex1, ex2) == mapping
def test_replacement(self): mapping = {"test": "TEST", "replace": "argument"} example = Example("replace $test by uppercase") modify_example(example, mapping) assert example.text == "replace TEST by uppercase" example = Example("is this $replace?") modify_example(example, mapping) assert example.text == "is this argument?" example = Example("The $replace is $test") modify_example(example, mapping) assert example.text == "The argument is TEST"
def _make_empty_example(self): """ Returns an example without any text or entity. Needed to be able to return a different type of example within intent definitions and other items. """ return Example()
def test_make_all_possibilities(self): empty = Example() examples = [Example("test1"), Example("test2")] all_examples = make_all_possibilities(examples, empty) assert empty in all_examples for ex in examples: assert ex in all_examples empty = Example() all_examples = make_all_possibilities(examples, empty, "randgen") for ex in all_examples: current_mapping = getattr(ex, RANDGEN_MAPPING_KEY, dict()) if ex == empty: assert not current_mapping["randgen"] else: assert current_mapping["randgen"] empty = Example() examples = [Example("test1"), Example("test2")] all_examples = make_all_possibilities(examples, empty, "randgen", True) for ex in all_examples: current_mapping = getattr(ex, RANDGEN_MAPPING_KEY, dict()) if ex == empty: assert current_mapping["randgen"] else: assert not current_mapping["randgen"]
def test_errors(self): empty = Example() examples = [Example("test1"), Example("test2")] setattr(examples[0], RANDGEN_MAPPING_KEY, {"name": False}) with pytest.raises(KeyError): make_all_possibilities(examples, empty, "name") examples[0] = Example("test1") setattr(examples[1], RANDGEN_MAPPING_KEY, {"name": True}) with pytest.raises(KeyError): make_all_possibilities(examples, empty, "name") setattr(empty, RANDGEN_MAPPING_KEY, {"name": True}) with pytest.raises(KeyError): make_all_possibilities([], empty, "name")
def generate_random(self, generated_randgens=None): if generated_randgens is None: generated_randgens = dict() self.check_casegen() # Manage randgen if self.randgen: return Example() if len(self.choices) <= 0: return Example() choice = choose(self.choices) generated_example = Example() for token in choice: generated_token = token.generate_random(generated_randgens) generated_example.text += generated_token.text generated_example.entities.extend(generated_token.entities) if self.casegen: generated_example.text = randomly_change_case( generated_example.text) if self.leading_space and may_get_leading_space( generated_example.text): generated_example.text = ' ' + generated_example.text return generated_example
def generate_all(self): self.check_casegen() generated_examples = [] if self.randgen: generated_examples.append(Example()) for choice in self.choices: current_examples = [] for token in choice: current_token_all_generations = token.generate_all() if len(current_examples) <= 0: current_examples = [ gen for gen in current_token_all_generations ] else: current_examples = [ Example(partial_example.text + gen.text, partial_example.entities + gen.entities) for partial_example in current_examples for gen in current_token_all_generations ] generated_examples.extend(current_examples) if self.leading_space: for (i, ex) in enumerate(generated_examples): if may_get_leading_space(ex.text): generated_examples[i].text = ' ' + ex.text if self.casegen: tmp_buffer = [] for ex in generated_examples: tmp_buffer.append( Example(with_leading_lower(ex.text), ex.entities)) tmp_buffer.append( Example(with_leading_upper(ex.text), ex.entities)) return generated_examples
def generate_all(self): generated_examples = [] if self.randgen is not None: generated_examples.append("") if self.casegen: generated_examples.append(with_leading_lower(self.words)) generated_examples.append(with_leading_upper(self.words)) else: generated_examples.append(self.words) if self.leading_space: for (i, ex) in enumerate(generated_examples): if may_get_leading_space(ex): generated_examples[i] = ' ' + ex result = [Example(ex) for ex in generated_examples] return result
def test_empty(self): ex1 = Example() ex2 = Example() assert concat_examples_with_randgen(ex1, ex2) == Example()
def generate_all(self): if self.leading_space: return [Example(' ' + self.word)] return [Example(self.word)]
def generate_random(self, arg_value=None): if self.leading_space: return Example(' ' + self.word) return Example(self.word)
def generate_all(self): return [Example()]
def generate_random(self, generated_randgens=None): return Example()
def test_empty(self): example = Example() example = with_leading_lower(example) assert example.text == ""
def test_make_all_pssibilities(self): examples = [Example("test"), Example(" alinea")] for ex in make_all_possibilities(examples): assert ex.text in ("test", "Test", " alinea", " Alinea")