Пример #1
0
            def process_permutation(permutation):
                idx = 0

                def reduce_sentence(result, cur):
                    if not is_synonym(cur):
                        return fp.append(fp.clone(cur))(result)

                    nonlocal idx

                    value = permutation[idx]
                    idx += 1

                    # Check if it's not an empty value
                    if value:
                        return fp.append({
                            'type': 'text',
                            'value': value,
                        })(result)

                    return result

                parts = fp.reduce(reduce_sentence)(sentence)
                part_idx = 0

                def reduce_whitespaces_in_part(result, part):
                    cur = fp.clone(part)

                    nonlocal part_idx

                    # First element
                    if part_idx == 0:
                        cur['value'] = cur['value'].lstrip()

                    # Last element or the following one starts with a space
                    if part_idx == (len(parts) -
                                    1) or parts[part_idx +
                                                1]['value'][0] == ' ':
                        cur['value'] = cur['value'].rstrip()

                    part_idx += 1

                    if not cur['value']:
                        return result

                    return fp.append(cur)(result)

                # Remove uneeded whitespaces introduced by optional synonyms
                stripped_parts = fp.reduce(reduce_whitespaces_in_part)(parts)

                return stripped_parts
Пример #2
0
        def build_sentence(sentence):
            entities = []

            def reduce_sentence(result, cur):
                if not utils.is_entity(cur):
                    return result + cur.get('value')

                entity_name = get_real_entity(cur.get('value'))
                value = augment.get_entity(entity_name).next(
                    cur.get('variant'))

                nonlocal entities

                entities.append({
                    'start': len(result),
                    'end': len(result) + len(value),
                    'entity': cur.get('value'),
                    # Check if its a synonym here
                    'value': synonyms_lookup.get(value, value),
                })

                return result + value

            return {
                'intent': name,
                'text': fp.reduce(reduce_sentence, '')(sentence),
                'entities': entities,
            }
Пример #3
0
    def build_entity_synonyms(acc, _, name):
        def reduce_entity(result, cur):
            synonyms = augment.get_synonyms(cur)

            if not synonyms:
                return result

            return fp.append({
                'value': cur,
                'synonyms': synonyms,
            })(result)

        return fp.append(
            *fp.reduce(reduce_entity)(augment.get_entity(name).all()))(acc)
Пример #4
0
def snips(chatl, **options):
    """Transform a chatl dataset to a snips representation as per
    https://snips-nlu.readthedocs.io/en/0.19.1/dataset.html
    """
    augment = Augment(chatl)

    def get_entity_type(entity):
        ent_type = entity.get('props', {}).get('type') or entity.get(
            'props', {}).get('snips:type')

        # If the type is not present in the dataset, let's consider it'a a built-in
        # one.
        if ent_type and not augment.entities.get(ent_type):
            return SNIPS_PREFIX + ent_type if SNIPS_PREFIX not in ent_type else ent_type

        return ent_type

    def build_entity(acc, entity, name):
        ent_type = get_entity_type(entity)

        if ent_type:
            if SNIPS_PREFIX in ent_type:
                return fp.append({
                    ent_type: {},
                })(acc)

            # It has a type present in the dataset, it should be considered as a slot
            return acc

        use_synonyms = False

        def build_entity_value(ent_name):
            nonlocal use_synonyms
            synonyms = augment.get_synonyms(ent_name)
            use_synonyms = use_synonyms or len(synonyms) > 0
            return {
                'value': ent_name,
                'synonyms': synonyms,
            }

        values = fp.map(build_entity_value)(augment.get_entity(name).all())

        return fp.append({
            name: {
                'data':
                values,
                'automatically_extensible':
                entity['props'].get('extensible', 'true') == 'true',
                'matching_strictness':
                float(entity['props'].get('strictness', '1')),
                'use_synonyms':
                use_synonyms,
            },
        })(acc)

    def build_sentence_part(part):
        part_value = part.get('value')

        if not utils.is_entity(part):
            return {'text': part_value}

        entity = augment.entities.get(part_value)
        # Retrieve the inner type of the entity if defined in the dataset
        ent_type = get_entity_type(entity) or part_value
        # And check if it references another defined entity because if it's true,
        # values will be fetched from here
        referenced_entity = ent_type if augment.entities.get(
            ent_type) else part_value

        return {
            'entity':
            ent_type,
            'slot_name':
            part_value,
            'text':
            augment.get_entity(referenced_entity).next(part.get('variant')),
        }

    def build_intents(intent):
        return {
            'utterances':
            fp.map(lambda sentence: {
                'data': fp.map(build_sentence_part)(sentence),
            })(intent.get('data', [])),
        }

    return utils.merge(
        {
            'language': 'en',
            'intents': fp.map(build_intents)(augment.get_intents()),
            'entities': fp.reduce(build_entity)(augment.entities),
        }, options)
Пример #5
0
    def test_it_should_correctly_transform_data(self):
        tests = [
            {
                'it': 'should provide a function which always returns the given value',
                'given': lambda d: fp.always(5)(d),
                'with': 1,
                'expected': 5,
            },
            {
                'it': 'should provide a function to extract a prop from an object',
                'given': lambda o: fp.prop('value')(o),
                'with': {'some': 'thing', 'value': 'five'},
                'expected': 'five',
            },
            {
                'it': 'should provide a function to instantiate a class',
                'given': lambda d: fp.instantiate(SayHello)(d),
                'with': 'jean',
                'expected': SayHello('jean'),
            },
            {
                'it': 'should instantiate a class with additional parameters if any',
                'given': lambda d: fp.instantiate(SayHello, 'other value')(d),
                'with': 'jean',
                'expected': SayHello('jean', 'other value'),
            },
            {
                'it': 'should provide a function to map on an array',
                'given': lambda d: fp.map(lambda s: s.upper())(d),
                'with': ['one', 'two', 'three'],
                'expected': ['ONE', 'TWO', 'THREE'],
            },
            {
                'it': 'should map on object values if given an object',
                'given': lambda d: fp.map(lambda s: s.upper())(d),
                'with': {'a': 'one', 'b': 'two', 'c': 'three'},
                'expected': {'a': 'ONE', 'b': 'TWO', 'c': 'THREE'},
            },
            {
                'it': 'should provide a function to reduce an array',
                'given': lambda d: fp.reduce(lambda p, c: (p.append(c) or p) if c > 5 else p)(d),
                'with': [1, 2, 3, 4, 5, 6, 7],
                'expected': [6, 7],
            },
            {
                'it': 'should reduce an object too',
                'given': lambda o: fp.reduce(lambda p, c, key: (p.update({
                    key: c,
                }) or p) if c > 5 else p)(o),
                'with': {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7},
                'expected': {'f': 6, 'g': 7},
            },
            {
                'it': 'should reduce with a given accumulator',
                'given': lambda d: fp.reduce(lambda p, c: p.update({c: c}) or p, {})(d),
                'with': [1, 2, 3],
                'expected': {1: 1, 2: 2, 3: 3},
            },
            {
                'it': 'should provide a way to pipe functions',
                'given': lambda d: fp.pipe(fp.always('test'), lambda s: s.upper())(d),
                'with': 'something',
                'expected': 'TEST',
            },
            {
                'it': 'should provide a way to flatten an array',
                'given': lambda d: fp.flatten(d),
                'with': [[1, 2], [3, 4]],
                'expected': [1, 2, 3, 4],
            },
            {
                'it': 'should flatten an object too',
                'given': lambda d: fp.flatten(d),
                'with': {'a': [1, 2], 'b': [3, 4]},
                'expected': [1, 2, 3, 4],
            },
            {
                'it': 'should append to an array',
                'given': lambda d: fp.append(3, 4)(d),
                'with': [1, 2],
                'expected': [1, 2, 3, 4],
            },
            {
                'it': 'should append to an object',
                'given': lambda d: fp.append({'some': 'value'}, {'else': 'too'})(d),
                'with': {'an': 'object'},
                'expected': {'an': 'object', 'some': 'value', 'else': 'too'},
            },
            {
                'it': 'should filter array elements',
                'given': lambda d: fp.filter(lambda a: (a % 2) == 0)(d),
                'with': [1, 2, 3, 4],
                'expected': [2, 4],
            },
            {
                'it': 'should clone an object',
                'given': lambda o: fp.clone(o),
                'with': {'an': 'object', 'with': {'nested': 'prop'}},
                'expected': {'an': 'object', 'with': {'nested': 'prop'}},
            },
        ]

        for test in tests:
            yield self.it_should_correctly_transform_data, \
                test['it'], test['given'], test['with'], test['expected']
Пример #6
0
def rasa(chatl, **options):
    """Convert a chatl dataset to a rasa representation as per
    https://rasa.com/docs/rasa/1.1.4/nlu/training-data-format/
    """
    augment = Augment(chatl, True)

    def get_real_entity(name):
        entity_type = augment.entities.get(name, {}).get('props',
                                                         {}).get('type')

        if entity_type and entity_type in augment.entities:
            return entity_type

        return name

    def get_regex_prop(name):
        return augment.entities.get(name, {}).get('props', {}).get('regex')

    # For rasa, we need a map of synonyms -> value
    synonyms_lookup = fp.reduce(lambda acc, synonyms, value: fp.append(*fp.map(
        lambda s: {s: value})(synonyms))(acc))(augment.synonyms_values)

    def build_lookup_table(acc, _, name):
        entity_name = get_real_entity(name)

        # Entity has regex feature, returns now
        if get_regex_prop(entity_name):
            return acc

        return fp.append({
            'name': name,
            'elements': augment.get_entity(entity_name).all(),
        })(acc)

    def build_intent_examples(acc, intent, name):
        def build_sentence(sentence):
            entities = []

            def reduce_sentence(result, cur):
                if not utils.is_entity(cur):
                    return result + cur.get('value')

                entity_name = get_real_entity(cur.get('value'))
                value = augment.get_entity(entity_name).next(
                    cur.get('variant'))

                nonlocal entities

                entities.append({
                    'start': len(result),
                    'end': len(result) + len(value),
                    'entity': cur.get('value'),
                    # Check if its a synonym here
                    'value': synonyms_lookup.get(value, value),
                })

                return result + value

            return {
                'intent': name,
                'text': fp.reduce(reduce_sentence, '')(sentence),
                'entities': entities,
            }

        return fp.append(*fp.map(build_sentence)(intent.get('data', [])))(acc)

    def build_entity_synonyms(acc, _, name):
        def reduce_entity(result, cur):
            synonyms = augment.get_synonyms(cur)

            if not synonyms:
                return result

            return fp.append({
                'value': cur,
                'synonyms': synonyms,
            })(result)

        return fp.append(
            *fp.reduce(reduce_entity)(augment.get_entity(name).all()))(acc)

    def build_regex_features(acc, _, name):
        pattern = get_regex_prop(get_real_entity(name))

        if pattern:
            return fp.append({
                'name': name,
                'pattern': pattern,
            })(acc)

        return acc

    return utils.merge(
        {
            'rasa_nlu_data': {
                'common_examples':
                fp.reduce(build_intent_examples, [])(augment.get_intents()),
                'regex_features':
                fp.reduce(build_regex_features, [])(augment.entities),
                'lookup_tables':
                fp.reduce(build_lookup_table, [])(augment.entities),
                'entity_synonyms':
                fp.reduce(build_entity_synonyms, [])(augment.entities),
            },
        }, options)
Пример #7
0
        def process_sentence_data(acc, sentence):
            sentence_synonyms = fp.filter(is_synonym)(sentence)

            # No synonyms, just returns now
            if not sentence_synonyms:
                return fp.append(sentence)(acc)

            # Get all synonyms values to generate permutations
            # For optional synonyms, add an empty entry.
            def reduce_synonyms(result, synonym_data):
                return fp.append(
                    ([''] if synonym_data.get('optional') else []) +
                    self.get_synonyms(synonym_data['value']))(result)

            synonyms_data = fp.reduce(reduce_synonyms)(sentence_synonyms)

            def process_permutation(permutation):
                idx = 0

                def reduce_sentence(result, cur):
                    if not is_synonym(cur):
                        return fp.append(fp.clone(cur))(result)

                    nonlocal idx

                    value = permutation[idx]
                    idx += 1

                    # Check if it's not an empty value
                    if value:
                        return fp.append({
                            'type': 'text',
                            'value': value,
                        })(result)

                    return result

                parts = fp.reduce(reduce_sentence)(sentence)
                part_idx = 0

                def reduce_whitespaces_in_part(result, part):
                    cur = fp.clone(part)

                    nonlocal part_idx

                    # First element
                    if part_idx == 0:
                        cur['value'] = cur['value'].lstrip()

                    # Last element or the following one starts with a space
                    if part_idx == (len(parts) -
                                    1) or parts[part_idx +
                                                1]['value'][0] == ' ':
                        cur['value'] = cur['value'].rstrip()

                    part_idx += 1

                    if not cur['value']:
                        return result

                    return fp.append(cur)(result)

                # Remove uneeded whitespaces introduced by optional synonyms
                stripped_parts = fp.reduce(reduce_whitespaces_in_part)(parts)

                return stripped_parts

            return fp.append(
                *fp.map(process_permutation)(list(product(
                    *synonyms_data))))(acc)
Пример #8
0
 def process_intent_data(intent_data):
     return fp.append({
         'data':
         fp.reduce(process_sentence_data)(intent_data.get('data', [])),
     })(intent_data)