Exemplo n.º 1
0
    def test_it_should_process_entities_with_variants(self):
        result = parse("""
@[city]
  paris
  rouen
  ~[new york]

@[city#variant]
  one variant
  another one

~[new york]
  nyc
  the big apple
""")

        snips_data = snips(result)

        expect(snips_data['entities']).to.have.length_of(1)
        expect(snips_data['entities']).to.have.key('city')

        entity = snips_data['entities']['city']

        expect(entity['data']).to.have.length_of(5)
        expect(entity['use_synonyms']).to.be.true
        expect(entity['automatically_extensible']).to.be.true
        expect(entity['data'][2]['synonyms']).to.have.length_of(2)
        expect(entity['data'][2]['synonyms']).to.equal(
            ['nyc', 'the big apple'])

        data = [d.get('value') for d in entity['data']]

        expect(data).to.equal(
            ['paris', 'rouen', 'new york', 'one variant', 'another one'])
Exemplo n.º 2
0
  def fit_from_skill_data(self, skills=None):
    """Fit the interpreter with every training data registered in the system.

    Args:
      skills (list of str): Optional list of skill names from which we should retrieve training data.
    
    """

    filtered_module_trainings = get_training_data(self.lang)

    if skills:
      filtered_module_trainings = { k: v for (k, v) in filtered_module_trainings.items() if k in skills }

    self._logger.info('Merging skill training data from "%d" modules' % len(filtered_module_trainings))

    data = {}
    sorted_trainings = sorted(filtered_module_trainings.items(), 
      key=lambda x: x[0])

    for (module, training_dsl) in sorted_trainings:
      if training_dsl:
        try:
          data = deep_update(data, parse(training_dsl))
        except Exception as e:
          self._logger.error('Could not parse "%s" training data: "%s"' % (module, e))
      else:
        self._logger.warning('No training data found for "%s"' % module)
      
    try:
      data = getattr(postprocessors, self.name)(data, language=self.lang)
    except AttributeError:
      return self._logger.critical('No post-processors found on pychatl for this interpreter!')

    self.fit(data)
Exemplo n.º 3
0
    def test_it_should_parse_synonyms(self):
        result = parse("""
~[new york](some=prop, something=else)
  nyc
  the big apple""")

        expect(result['synonyms']).to.have.length_of(1)
        expect(result['synonyms']).to.have.key('new york')

        synonym = result['synonyms']['new york']

        expect(synonym['props']).to.have.length_of(2)
        expect(synonym['props']).to.have.key('some')
        expect(synonym['props']).to.have.key('something')
        expect(synonym['props']['some']).to.equal('prop')
        expect(synonym['props']['something']).to.equal('else')

        expect(synonym['data']).to.have.length_of(2)

        data = synonym['data']

        expect(data[0]['type']).to.equal('text')
        expect(data[0]['value']).to.equal('nyc')
        expect(data[1]['type']).to.equal('text')
        expect(data[1]['value']).to.equal('the big apple')
Exemplo n.º 4
0
    def test_it_should_parse_entities_variants(self):
        result = parse("""
@[city](some=prop, something=else)
  paris
  rouen
  ~[new york]

@[city#variant](var=prop)
  one variant
  another one
""")

        expect(result['entities']).to.have.length_of(1)

        entity = result['entities']['city']

        expect(entity['variants']).to.have.length_of(1)
        expect(entity['variants']).to.have.key('variant')

        expect(entity['props']).to.have.length_of(3)
        expect(entity['props']).to.have.key('some')
        expect(entity['props']).to.have.key('something')
        expect(entity['props']).to.have.key('var')
        expect(entity['props']['some']).to.equal('prop')
        expect(entity['props']['something']).to.equal('else')
        expect(entity['props']['var']).to.equal('prop')

        variant = entity['variants']['variant']

        expect(variant).to.have.length_of(2)

        expect(variant[0]['type']).to.equal('text')
        expect(variant[0]['value']).to.equal('one variant')
        expect(variant[1]['type']).to.equal('text')
        expect(variant[1]['value']).to.equal('another one')
Exemplo n.º 5
0
def main(): # pragma: no cover
  parser = argparse.ArgumentParser(description='Generates training dataset from a simple DSL.')
  parser.add_argument('--version', action='version', version='%(prog)s v' + __version__)
  parser.add_argument('files', type=str, nargs='+', help='One or more DSL files to process')
  parser.add_argument('-a', '--adapter', type=str, help='Adapter to use when post-processing outputed data')
  parser.add_argument('-o', '--options', type=str, help='Raw options to give to the post processor')
  parser.add_argument('--pretty', action='store_true', help='Pretty output')

  args = parser.parse_args(sys.argv[1:])

  parsed_data = []

  for file in args.files:
    with open(file, encoding='utf-8') as f:
      parsed_data.append(parse(f.read()))

  # Merge all processed data
  data = {}

  for d in parsed_data:
    data = deep_update(data, d)

  if args.options:
    options = json.loads(args.options)
  else:
    options = {}

  if args.adapter:
    data = getattr(postprocess, args.adapter)(data, **options)

  print (json.dumps(data, indent=2 if args.pretty else None))
Exemplo n.º 6
0
    def test_it_should_parse_entities(self):
        result = parse("""
@[city](some=prop, something=else)
  paris
  rouen
  ~[new york]
""")

        expect(result['entities']).to.have.length_of(1)
        expect(result['entities']).to.have.key('city')

        entity = result['entities']['city']

        expect(entity['props']).to.have.length_of(2)
        expect(entity['props']).to.have.key('some')
        expect(entity['props']).to.have.key('something')
        expect(entity['props']['some']).to.equal('prop')
        expect(entity['props']['something']).to.equal('else')

        data = entity['data']

        expect(data).to.have.length_of(3)

        expect(data[0]['type']).to.equal('text')
        expect(data[0]['value']).to.equal('paris')
        expect(data[1]['type']).to.equal('text')
        expect(data[1]['value']).to.equal('rouen')
        expect(data[2]['type']).to.equal('synonym')
        expect(data[2]['value']).to.equal('new york')
Exemplo n.º 7
0
    def test_it_should_parse_comments(self):
        result = parse("""
# chatl is really easy to understand.
#
# You can defines:
#   - Intents
#   - Entities (with or without variants)
#   - Synonyms
#   - Comments (only at the top level)

# Inside an intent, you got training data.
# Training data can refer to one or more entities and/or synonyms, they will be used
# by generators to generate all possible permutations and training samples.

%[my_intent]
  ~[greet] some training data @[date]
  another training data that uses an @[entity] at @[date#with_variant]

~[greet]
  hi
  hello

# Entities contains available samples and could refer to a synonym.

@[entity]
  some value
  other value
  ~[a synonym]

# Synonyms contains only raw values

~[a synonym]
  possible synonym
  another one

# Entities and intents can define arbitrary properties that will be made available
# to generators.
# For snips, `type` and `extensible` are used for example.

@[date](type=snips/datetime)
  tomorrow
  today

# Variants is used only to generate training sample with specific values that should
# maps to the same entity name, here `date`. Props will be merged with the root entity.

@[date#with_variant]
  the end of the day
  nine o clock
  twenty past five
""")

        expect(result['intents']).to.have.length_of(1)
        expect(result['entities']).to.have.length_of(2)
        expect(result['synonyms']).to.have.length_of(2)
Exemplo n.º 8
0
    def test_it_should_process_entities(self):
        result = parse("""
@[city]
  paris
  rouen
  ~[new york]

@[room](extensible=false, strictness=0.8)
  kitchen
  bedroom

@[date](type=datetime)
  tomorrow
  on tuesday

~[new york]
  nyc
  the big apple
""")

        snips_data = snips(result)

        expect(snips_data['entities']).to.have.length_of(3)
        expect(snips_data['entities']).to.have.key('city')

        entity = snips_data['entities']['city']

        expect(entity['use_synonyms']).to.be.true
        expect(entity['automatically_extensible']).to.be.true
        expect(entity['matching_strictness']).to.equal(1.0)

        expect(entity['data']).to.have.length_of(3)
        expect(entity['data'][0]['value']).to.equal('paris')
        expect(entity['data'][1]['value']).to.equal('rouen')
        expect(entity['data'][2]['value']).to.equal('new york')
        expect(entity['data'][2]['synonyms']).to.have.length_of(2)
        expect(entity['data'][2]['synonyms']).to.equal(
            ['nyc', 'the big apple'])

        expect(snips_data['entities']).to.have.key('room')

        entity = snips_data['entities']['room']

        expect(entity['use_synonyms']).to.be.false
        expect(entity['automatically_extensible']).to.be.false
        expect(entity['matching_strictness']).to.equal(0.8)

        expect(entity['data']).to.have.length_of(2)
        expect(snips_data['entities']).to_not.have.key('date')
        expect(snips_data['entities']).to.have.key('snips/datetime')

        expect(snips_data['entities']['snips/datetime']).to.be.empty
Exemplo n.º 9
0
    def test_process_options(self):
        result = parse("""
@[city]
  paris
  rouen
""")

        snips_data = snips(result)

        expect(snips_data.get('language')).to.equal('en')

        snips_data = snips(result, language='fr')

        expect(snips_data.get('language')).to.equal('fr')
Exemplo n.º 10
0
    def test_it_should_allow_obsolete_declaration_of_type_for_now(self):
        with patch('logging.warning') as mlog:
            result = parse("""
@[date](snips:type=snips/datetime)
  tomorrow
  """)

            snips_data = snips(result)

            expect(snips_data['entities']).to.have.key('snips/datetime')
            expect(snips_data['entities']['snips/datetime']).to.be.empty

            mlog.assert_called_once_with(
                'snips:type has been replaced by type. You should now leave the snips/ prefix away when using it'
            )
Exemplo n.º 11
0
    def test_it_should_parse_complex_properties(self):
        result = parse("""
@[an entity](with complex=property value, and:maybe=an0 ther @)
  a value
""")

        expect(result['entities']).to.have.length_of(1)
        expect(result['entities']).to.have.key('an entity')

        entity = result['entities']['an entity']

        expect(entity['props']).to.have.length_of(2)
        expect(entity['props']).to.have.key('with complex')
        expect(entity['props']).to.have.key('and:maybe')
        expect(entity['props']['with complex']).to.equal('property value')
        expect(entity['props']['and:maybe']).to.equal('an0 ther @')
Exemplo n.º 12
0
    def test_it_should_parse_empty_entities(self):
        result = parse("""
@[room]
  kitchen
  bedroom

@[anotherRoom](type=room)
""")

        expect(result['entities']).to.have.length_of(2)
        expect(result['entities']).to.have.key('room')
        expect(result['entities']).to.have.key('anotherRoom')

        expect(result['entities']['anotherRoom']['props']).to.have.key('type')
        expect(result['entities']['anotherRoom']['props']['type']).to.equal(
            'room')
        expect(result['entities']['anotherRoom']['data']).to.be.empty
Exemplo n.º 13
0
    def test_it_should_parse_intents(self):
        result = parse("""
%[get_forecast](some=prop, something=else)
  will it rain in @[city]
  ~[greet] what's the weather like in @[city#variant]
""")

        expect(result['intents']).to.have.length_of(1)
        expect(result['intents']).to.have.key('get_forecast')

        intent = result['intents']['get_forecast']

        expect(intent['props']).to.have.length_of(2)
        expect(intent['props']).to.have.key('some')
        expect(intent['props']).to.have.key('something')
        expect(intent['props']['some']).to.equal('prop')
        expect(intent['props']['something']).to.equal('else')

        expect(intent['data']).to.have.length_of(2)

        data = intent['data'][0]

        expect(data).to.have.length_of(2)

        expect(data[0]['type']).to.equal('text')
        expect(data[0]['value']).to.equal('will it rain in ')
        expect(data[1]['type']).to.equal('entity')
        expect(data[1]['value']).to.equal('city')
        expect(data[1]['variant']).to.be.none

        data = intent['data'][1]

        expect(data).to.have.length_of(3)

        expect(data[0]['type']).to.equal('synonym')
        expect(data[0]['value']).to.equal('greet')
        expect(data[1]['type']).to.equal('text')
        expect(data[1]['value']).to.equal(" what's the weather like in ")
        expect(data[2]['type']).to.equal('entity')
        expect(data[2]['value']).to.equal('city')
        expect(data[2]['variant']).to.equal('variant')
Exemplo n.º 14
0
def main():  # pragma: no cover
    """Main entry point for the program.
    """
    parser = argparse.ArgumentParser(
        description='Generates training dataset from a simple DSL.')
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s v' + __version__)
    parser.add_argument('files',
                        type=str,
                        nargs='+',
                        help='One or more DSL files to process')
    parser.add_argument('-a',
                        '--adapter',
                        type=str,
                        help='Name of the adapter to use')
    parser.add_argument('-m',
                        '--merge',
                        type=str,
                        help='Options file to merge with the final result')
    parser.add_argument('--pretty', action='store_true', help='Pretty output')

    args = parser.parse_args(sys.argv[1:])

    data = {}

    for file in args.files:
        with open(file, encoding='utf-8') as handle:
            data = merge(data, parse(handle.read()))

    if args.merge:
        options = json.loads(args.merge)
    else:
        options = {}

    if args.adapter:
        data = getattr(adapters, args.adapter)(data, **options)

    print(json.dumps(data, indent=2 if args.pretty else None))
Exemplo n.º 15
0
    def test_it_should_process_empty_entities_which_refer_to_another_one(self):
        result = parse("""
%[my_intent]
  we should go from @[room] to @[anotherRoom]

@[room]
  kitchen
  bedroom

@[anotherRoom](type=room)
""")

        snips_data = snips(result)

        expect(snips_data['entities']).to.have.length_of(1)
        expect(snips_data['entities']).to.have.key('room')

        expect(snips_data['intents']).to.have.length_of(1)
        expect(snips_data['intents']).to.have.key('my_intent')

        intent = snips_data['intents']['my_intent']

        expect(intent).to.have.key('utterances')

        utterances = intent['utterances']

        expect(utterances).to.have.length_of(1)

        data = utterances[0].get('data')

        expect(data).to.have.length_of(4)
        expect(data[0].get('text')).to.equal('we should go from ')
        expect(data[1].get('slot_name')).to.equal('room')
        expect(data[1].get('entity')).to.equal('room')
        expect(data[1].get('text')).to.equal('kitchen')
        expect(data[2].get('text')).to.equal(' to ')
        expect(data[3].get('slot_name')).to.equal('anotherRoom')
        expect(data[3].get('entity')).to.equal('room')
        expect(data[3].get('text')).to.equal('bedroom')
Exemplo n.º 16
0
    def fit_from_skill_data(self, skills: List[str] = None) -> None: # pylint: disable=inconsistent-return-statements
        """Fit the interpreter with every training data registered in the inner TrainingsStore.

        Args:
          skills (list of str): Optional list of skill names from which we should retrieve
            training data. Used to handle context understanding.

        """
        filtered_module_trainings = self._trainings.all(self.lang)

        if skills:
            filtered_module_trainings = {
                k: v for (k, v) in filtered_module_trainings.items() if k in skills}

        self._logger.info(
            'Merging skill training data from "%d" modules', len(filtered_module_trainings))

        data = {}
        sorted_trainings = sorted(filtered_module_trainings.items(),
                                  key=lambda x: x[0])

        for (module, training_dsl) in sorted_trainings:
            if training_dsl:
                try:
                    data = merge(data, parse(training_dsl))
                except Exception as err: # pylint: disable=W0703
                    self._logger.error(
                        'Could not parse "%s" training data: "%s"', module, err)
            else:
                self._logger.warning('No training data found for "%s"', module)

        try:
            data = getattr(adapters, self.name)(data, language=self.lang)
        except AttributeError:
            return self._logger.critical(
                'No post-processors found on pychatl for this interpreter!')

        self.fit(data)
Exemplo n.º 17
0
 def it_should_transform_to_snips_dataset(self, it, dsl, options, expected):
     expect(snips(parse(dsl), **options)).to.equal(expected)
Exemplo n.º 18
0
 def it_should_correctly_parse_dsl(self, it, dsl, expected):
     expect(parse(dsl)).to.equal(expected)
Exemplo n.º 19
0
def get_data():
    data = parse("""
%[lights_on]
    turn the @[room]'s lights on would you
    turn lights on in the @[room]
    lights on in @[room] please
    turn on the lights in @[room]
    turn the lights on in @[room]
    enlight me in @[room]

%[lights_off]
    turn the @[room]'s lights off would you
    turn lights off in the @[room]
    lights off in @[room] please
    lights off in @[room] and @[room]
    turn off the lights in @[room]
    turn the lights off in @[room]

%[start_vacuum_cleaner]
    start ~[vacuum_cleaner]
    would you like start ~[vacuum_cleaner] please

%[stop_vacuum_cleaner]
    stop ~[vacuum_cleaner]
    would you like stop ~[vacuum_cleaner] please

%[get_forecast]
    will it be sunny in @[location] at @[date#at]
    what's the weather like in @[location] on @[date#on]
    will it rain in @[location] @[date]
    can we expect a sunny day @[date] in @[location]
    should I take an umbrella in @[location] @[date]
    what kind of weather should I expect at @[date#at] in @[location]
    what will be the weather on @[date#on] in @[location]
    tell me if it is going to rain @[date] in @[location]

~[vacuum_cleaner]
    vacuum cleaner
    vacuum
    vacuuming
    hoover
    hoovering
    aspirator

~[basement]
    cellar

@[room](extensible=false)
    living room
    kitchen
    bedroom
    ~[basement]

@[location]
    los angeles
    paris
    rio de janeiro
    tokyo
    london
    tel aviv
    new york
    saint-étienne du rouvray

@[date](type=datetime)
    tomorrow
    today
    this evening

@[date#at]
    the end of the day
    nine o'clock

@[date#on]
    tuesday
    monday

""")
    return data
Exemplo n.º 20
0
    def test_it_should_process_intents(self):
        result = parse("""
%[get_forecast]
  will it rain in @[city] on @[date]
  ~[greet] what's the weather like in @[city#variant]

@[city]
  paris
  rouen

@[city#variant]
  new york
  los angeles

@[date](type=datetime)
  tomorrow

~[greet]
  hi
  hello

""")

        snips_data = snips(result)

        expect(snips_data['intents']).to.have.length_of(1)
        expect(snips_data['intents']).to.have.key('get_forecast')

        intent = snips_data['intents']['get_forecast']

        expect(intent).to.have.key('utterances')

        utterances = intent['utterances']

        expect(utterances).to.have.length_of(3)

        data = utterances[0].get('data')

        expect(data).to.have.length_of(4)
        expect(data[0].get('text')).to.equal('will it rain in ')
        expect(data[1].get('text')).to.equal('paris')
        expect(data[1].get('slot_name')).to.equal('city')
        expect(data[1].get('entity')).to.equal('city')
        expect(data[2].get('text')).to.equal(' on ')
        expect(data[3].get('text')).to.equal('tomorrow')
        expect(data[3].get('slot_name')).to.equal('date')
        expect(data[3].get('entity')).to.equal('snips/datetime')

        data = utterances[1].get('data')

        expect(data).to.have.length_of(3)
        expect(data[0].get('text')).to.equal('hi')
        expect(data[1].get('text')).to.equal(" what's the weather like in ")
        expect(data[2].get('text')).to.equal('new york')
        expect(data[2].get('slot_name')).to.equal('city')
        expect(data[2].get('entity')).to.equal('city')

        data = utterances[2].get('data')

        expect(data).to.have.length_of(3)
        expect(data[0].get('text')).to.equal('hello')
        expect(data[1].get('text')).to.equal(" what's the weather like in ")
        expect(data[2].get('text')).to.equal('los angeles')
        expect(data[2].get('slot_name')).to.equal('city')
        expect(data[2].get('entity')).to.equal('city')