Exemplo n.º 1
0
    def test_create(self):
        dictionary = Dictionary()

        dictionary.add_word(self.monkey)
        dictionary.add_word(self.silly)
        dictionary.add_word(self.hit)

        self.assertEqual(dictionary.get_word(u'обезьянка'), self.monkey)
        self.assertEqual(dictionary.get_word(u'глупый'), self.silly)
        self.assertEqual(dictionary.get_word(u'ударил'), self.hit)
Exemplo n.º 2
0
 def setUp(self):
     self.dictionary = Dictionary()
     self.dictionary.add_word(Noun.create_from_baseword(
         morph, u'обезьянка'))
     self.dictionary.add_word(Noun.create_from_baseword(morph, u'тень'))
     self.dictionary.add_word(
         Adjective.create_from_baseword(morph, u'глупый'))
     self.dictionary.add_word(Verb.create_from_baseword(morph, u'ударил'))
     self.dictionary.add_word(Verb.create_from_baseword(
         morph, u'подставил'))
     self.dictionary.add_word(
         Adjective.create_from_baseword(morph, u'целый'))
Exemplo n.º 3
0
    def test_create(self):
        dictionary = Dictionary()

        dictionary.add_word(self.monkey)
        dictionary.add_word(self.silly)
        dictionary.add_word(self.hit)

        self.assertEqual(dictionary.get_word(u'обезьянка'), self.monkey)
        self.assertEqual(dictionary.get_word(u'глупый'), self.silly)
        self.assertEqual(dictionary.get_word(u'ударил'), self.hit)
Exemplo n.º 4
0
 def setUp(self):
     self.dictionary = Dictionary()
     self.dictionary.add_word(Noun.create_from_baseword(morph, u'обезьянка'))
     self.dictionary.add_word(Noun.create_from_baseword(morph, u'тень'))
     self.dictionary.add_word(Adjective.create_from_baseword(morph, u'глупый'))
     self.dictionary.add_word(Verb.create_from_baseword(morph, u'ударил'))
     self.dictionary.add_word(Verb.create_from_baseword(morph, u'подставил'))
     self.dictionary.add_word(Adjective.create_from_baseword(morph, u'целый'))
Exemplo n.º 5
0
class TemplateTest(TestCase):

    def setUp(self):
        self.dictionary = Dictionary()
        self.dictionary.add_word(Noun.create_from_baseword(morph, u'обезьянка'))
        self.dictionary.add_word(Noun.create_from_baseword(morph, u'тень'))
        self.dictionary.add_word(Adjective.create_from_baseword(morph, u'глупый'))
        self.dictionary.add_word(Verb.create_from_baseword(morph, u'ударил'))
        self.dictionary.add_word(Verb.create_from_baseword(morph, u'подставил'))
        self.dictionary.add_word(Adjective.create_from_baseword(morph, u'целый'))

    def test_externals(self):
        template = Template.create(morph, u'ударить [[hero|вн]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка'}), u'ударить обезьянку')

        template = Template.create(morph, u'ударить [[hero|вн,мн]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка' } ), u'ударить обезьянок')

    def test_partial_dependence(self):
        template = Template.create(morph, u'ударить [[hero|вн]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': (u'обезьянка', u'мн') } ), u'ударить обезьянок')


    def test_internals(self):
        template = Template.create(morph, u'[{тень|hero|тв}] [[hero|рд]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': (u'обезьянка', u'мн')} ), u'тенями обезьянок')

    def test_noun_dependences(self):
        template = Template.create(morph, u'[[shadow|hero|тв]] [[hero|рд]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': (u'обезьянка', u'мн'),
                                                               'shadow': u'тень'} ), u'тенями обезьянок')

    def test_numeral_1_dependences(self):
        template = Template.create(morph, u'[[number||]] [[hero|number|им]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка',
                                                               'number': 1} ), u'1 обезьянка')

        template = Template.create(morph, u'[[number||]] [[hero|number|рд]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка',
                                                               'number': 1} ), u'1 обезьянки')

        template = Template.create(morph, u'[[number||]] [[hero|number|дт]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка',
                                                               'number': 1} ), u'1 обезьянке')

    def test_numeral_2_dependences(self):
        template = Template.create(morph, u'[[number||]] [[hero|number|им]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка',
                                                               'number': 2} ), u'2 обезьянки')

        template = Template.create(morph, u'[[number||]] [[hero|number|рд]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка',
                                                               'number': 2} ), u'2 обезьянок')

        template = Template.create(morph, u'[[number||]] [[hero|number|дт]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка',
                                                               'number': 2} ), u'2 обезьянкам')

    def test_numeral_13_adj_dependences(self):
        template = Template.create(morph, u'[[number||]] [{целый|number|}]')
        self.assertEqual(template.substitute(self.dictionary, {'number': 13} ), u'13 целых')


    def test_numeral_5_dependences(self):
        template = Template.create(morph, u'[[number||]] [[hero|number|им]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка',
                                                               'number': 5} ), u'5 обезьянок')

        template = Template.create(morph, u'[[number||]] [[hero|number|рд]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка',
                                                               'number': 5} ), u'5 обезьянок')

        template = Template.create(morph, u'[[number||]] [[hero|number|дт]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка',
                                                               'number': 5} ), u'5 обезьянкам')


    def test_dependences(self):
        template = Template.create(morph, u'[{глупый|hero|рд}] [[hero|рд]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка'} ), u'глупой обезьянки')

        template = Template.create(morph, u'враг [{ударил|hero|буд,3л}] [[hero|вн]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка'} ), u'враг ударит обезьянку')

        template = Template.create(morph, u'крыса [{ударил|прш,жр}] [[hero|вн]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка'} ), u'крыса ударила обезьянку')


    def test_fake_substitutions(self):
        template = Template.create(morph, u'[{глупый|hero|рд}] [[hero|рд]]')
        self.assertEqual(template.substitute(self.dictionary, {'hero': Fake(u'19x5')} ), u'глупого 19x5')

    def test_upper_case(self):
        template = Template.create(morph, u'Первое предложение. [{подставил|hero|прш,загл}] слово в начало, а затем вставим имя от [[hero|рд]]. [[shadow|загл]] пришла.')
        result = template.substitute(self.dictionary, {'hero': (u'обезьянка', u'загл'), 'shadow': u'тень'} )
        self.assertEqual(result, u'Первое предложение. Подставила слово в начало, а затем вставим имя от Обезьянки. Тень пришла.')
Exemplo n.º 6
0
    def test_serialization(self):

        with tempfile.NamedTemporaryFile(delete=False) as f:
            dictionary = Dictionary()

            dictionary.add_word(self.monkey)
            dictionary.add_word(self.silly)
            dictionary.add_word(self.hit)

            dictionary.save(storage=f.name)

            dictionary = Dictionary()
            dictionary.load(storage=f.name)

            self.assertEqual(dictionary.get_word(u'обезьянка').normalized, u'обезьянка')
            self.assertEqual(dictionary.get_word(u'глупый').normalized, u'глупый')
            self.assertEqual(dictionary.get_word(u'ударил').normalized, u'ударил')
Exemplo n.º 7
0
def import_texts(morph, source_dir, tech_vocabulary_path, voc_storage, dict_storage, tmp_dir='/tmp', check=False):
    from textgen.templates import Dictionary, Vocabulary, Template
    from textgen.words import WordBase

    vocabulary = Vocabulary()

    user_data = {'modules': {}}

    if not check:
        if os.path.exists(voc_storage):
            vocabulary.load(storage=voc_storage)

    dictionary = Dictionary()
    if os.path.exists(dict_storage):
        dictionary.load(storage=dict_storage)

    tech_vocabulary = get_tech_vocabulary(tech_vocabulary_path)

    for word in tech_vocabulary.keys():
        word = WordBase.create_from_string(morph, word.strip(), tech_vocabulary)
        dictionary.add_word(word)

    for filename in os.listdir(source_dir):

        if not filename.endswith('.json'):
            continue

        texts_path = os.path.join(source_dir, filename)

        if not os.path.isfile(texts_path):
            continue

        group = filename[:-5]

        if check:
            check_path = os.path.join(tmp_dir, 'textgen-files-check-'+filename)

            if os.path.exists(check_path) and os.path.getmtime(check_path) > os.path.getmtime(texts_path):
                print 'group "%s" has been already processed' % group
                continue

        print 'load "%s"' % group

        with open(texts_path) as f:
            data = json.loads(f.read())

            if group != data['prefix']:
                raise Exception('filename MUST be equal to prefix')

            for suffix in data['types']:
                if suffix == '':
                    raise Exception('type MUST be not equal to empty string')

            user_data['modules'][data['prefix']] = get_user_data_for_module(data)

            variables_verbose = data['variables_verbose']

            global_variables = data.get('variables', {})

            for variable_name in global_variables.keys():
                if not variables_verbose.get(variable_name):
                    raise Exception('no verbose name for variable "%s"' % variable_name)

            for suffix, type_ in data['types'].items():
                phrase_key = '%s_%s' % (group , suffix)

                vocabulary.register_type(phrase_key)

                if isinstance(type_, list):
                    phrases = type_
                    local_variables = {}
                else:
                    phrases = type_['phrases']
                    local_variables = type_.get('variables', {})

                for variable_name in local_variables.keys():
                    if not variables_verbose.get(variable_name):
                        raise Exception('no verbose name for variable "%s"' % variable_name)

                variables = copy.copy(global_variables)
                variables.update(local_variables)

                for phrase in phrases:
                    template_phrase, test_phrase = phrase

                    template = Template.create(morph, template_phrase, available_externals=variables.keys(), tech_vocabulary=tech_vocabulary)

                    vocabulary.add_phrase(phrase_key, template)

                    for value in variables.values():
                        if isinstance(value, numbers.Number):
                            continue
                        word = WordBase.create_from_string(morph, value, tech_vocabulary)
                        dictionary.add_word(word)

                    for string in template.get_internal_words():
                        word = WordBase.create_from_string(morph, string, tech_vocabulary)
                        dictionary.add_word(word)

                    test_result = template.substitute(dictionary, variables)

                    test_result_normalized = efication(test_result)
                    test_phrase_normalized = efication(test_phrase)

                    if test_result_normalized != test_phrase_normalized:
                        msg = None
                        for i in xrange(min(len(test_result_normalized), len(test_phrase_normalized))):
                            if test_result_normalized[i] != test_phrase_normalized[i]:
                                msg = '''
wrong test_render for phrase "%s"

prefix: "%s"

diff: %s|%s''' % (template_phrase, test_result_normalized[:i], test_result_normalized[i], test_phrase_normalized[i])
                                break

                        if msg is None:
                            msg = 'different len: "%s"|"%s"' % (test_result_normalized[i:], test_phrase_normalized[i:])

                        raise TextgenException(msg)

        if check:
            with open(check_path, 'w') as f:
                f.write('1')

    if not check:
        vocabulary.save(storage=voc_storage)
        dictionary.save(storage=dict_storage)

    return user_data
Exemplo n.º 8
0
def import_texts(morph,
                 source_dir,
                 tech_vocabulary_path,
                 voc_storage,
                 dict_storage,
                 tmp_dir='/tmp',
                 check=False):
    from textgen.templates import Dictionary, Vocabulary, Template
    from textgen.words import WordBase

    vocabulary = Vocabulary()

    user_data = {'modules': {}}

    if not check:
        if os.path.exists(voc_storage):
            vocabulary.load(storage=voc_storage)

    dictionary = Dictionary()
    if os.path.exists(dict_storage):
        dictionary.load(storage=dict_storage)

    tech_vocabulary = get_tech_vocabulary(tech_vocabulary_path)

    for word in tech_vocabulary.keys():
        word = WordBase.create_from_string(morph, word.strip(),
                                           tech_vocabulary)
        dictionary.add_word(word)

    for filename in os.listdir(source_dir):

        if not filename.endswith('.json'):
            continue

        texts_path = os.path.join(source_dir, filename)

        if not os.path.isfile(texts_path):
            continue

        group = filename[:-5]

        if check:
            check_path = os.path.join(tmp_dir,
                                      'textgen-files-check-' + filename)

            if os.path.exists(check_path) and os.path.getmtime(
                    check_path) > os.path.getmtime(texts_path):
                print 'group "%s" has been already processed' % group
                continue

        print 'load "%s"' % group

        with open(texts_path) as f:
            data = json.loads(f.read())

            if group != data['prefix']:
                raise Exception('filename MUST be equal to prefix')

            for suffix in data['types']:
                if suffix == '':
                    raise Exception('type MUST be not equal to empty string')

            user_data['modules'][data['prefix']] = get_user_data_for_module(
                data)

            variables_verbose = data['variables_verbose']

            global_variables = data.get('variables', {})

            for variable_name in global_variables.keys():
                if not variables_verbose.get(variable_name):
                    raise Exception('no verbose name for variable "%s"' %
                                    variable_name)

            for suffix, type_ in data['types'].items():
                phrase_key = '%s_%s' % (group, suffix)

                vocabulary.register_type(phrase_key)

                if isinstance(type_, list):
                    phrases = type_
                    local_variables = {}
                else:
                    phrases = type_['phrases']
                    local_variables = type_.get('variables', {})

                for variable_name in local_variables.keys():
                    if not variables_verbose.get(variable_name):
                        raise Exception('no verbose name for variable "%s"' %
                                        variable_name)

                variables = copy.copy(global_variables)
                variables.update(local_variables)

                for phrase in phrases:
                    template_phrase, test_phrase = phrase

                    template = Template.create(
                        morph,
                        template_phrase,
                        available_externals=variables.keys(),
                        tech_vocabulary=tech_vocabulary)

                    vocabulary.add_phrase(phrase_key, template)

                    for value in variables.values():
                        if isinstance(value, numbers.Number):
                            continue
                        word = WordBase.create_from_string(
                            morph, value, tech_vocabulary)
                        dictionary.add_word(word)

                    for string in template.get_internal_words():
                        word = WordBase.create_from_string(
                            morph, string, tech_vocabulary)
                        dictionary.add_word(word)

                    test_result = template.substitute(dictionary, variables)

                    test_result_normalized = efication(test_result)
                    test_phrase_normalized = efication(test_phrase)

                    if test_result_normalized != test_phrase_normalized:
                        msg = None
                        for i in xrange(
                                min(len(test_result_normalized),
                                    len(test_phrase_normalized))):
                            if test_result_normalized[
                                    i] != test_phrase_normalized[i]:
                                msg = '''
wrong test_render for phrase "%s"

prefix: "%s"

diff: %s|%s''' % (template_phrase, test_result_normalized[:i],
                                test_result_normalized[i], test_phrase_normalized[i])
                                break

                        if msg is None:
                            msg = 'different len: "%s"|"%s"' % (
                                test_result_normalized[i:],
                                test_phrase_normalized[i:])

                        raise TextgenException(msg)

        if check:
            with open(check_path, 'w') as f:
                f.write('1')

    if not check:
        vocabulary.save(storage=voc_storage)
        dictionary.save(storage=dict_storage)

    return user_data
Exemplo n.º 9
0
class TemplateTest(TestCase):
    def setUp(self):
        self.dictionary = Dictionary()
        self.dictionary.add_word(Noun.create_from_baseword(
            morph, u'обезьянка'))
        self.dictionary.add_word(Noun.create_from_baseword(morph, u'тень'))
        self.dictionary.add_word(
            Adjective.create_from_baseword(morph, u'глупый'))
        self.dictionary.add_word(Verb.create_from_baseword(morph, u'ударил'))
        self.dictionary.add_word(Verb.create_from_baseword(
            morph, u'подставил'))
        self.dictionary.add_word(
            Adjective.create_from_baseword(morph, u'целый'))

    def test_externals(self):
        template = Template.create(morph, u'ударить [[hero|вн]]')
        self.assertEqual(
            template.substitute(self.dictionary, {'hero': u'обезьянка'}),
            u'ударить обезьянку')

        template = Template.create(morph, u'ударить [[hero|вн,мн]]')
        self.assertEqual(
            template.substitute(self.dictionary, {'hero': u'обезьянка'}),
            u'ударить обезьянок')

    def test_partial_dependence(self):
        template = Template.create(morph, u'ударить [[hero|вн]]')
        self.assertEqual(
            template.substitute(self.dictionary,
                                {'hero': (u'обезьянка', u'мн')}),
            u'ударить обезьянок')

    def test_internals(self):
        template = Template.create(morph, u'[{тень|hero|тв}] [[hero|рд]]')
        self.assertEqual(
            template.substitute(self.dictionary,
                                {'hero': (u'обезьянка', u'мн')}),
            u'тенями обезьянок')

    def test_noun_dependences(self):
        template = Template.create(morph, u'[[shadow|hero|тв]] [[hero|рд]]')
        self.assertEqual(
            template.substitute(self.dictionary, {
                'hero': (u'обезьянка', u'мн'),
                'shadow': u'тень'
            }), u'тенями обезьянок')

    def test_numeral_1_dependences(self):
        template = Template.create(morph, u'[[number||]] [[hero|number|им]]')
        self.assertEqual(
            template.substitute(self.dictionary, {
                'hero': u'обезьянка',
                'number': 1
            }), u'1 обезьянка')

        template = Template.create(morph, u'[[number||]] [[hero|number|рд]]')
        self.assertEqual(
            template.substitute(self.dictionary, {
                'hero': u'обезьянка',
                'number': 1
            }), u'1 обезьянки')

        template = Template.create(morph, u'[[number||]] [[hero|number|дт]]')
        self.assertEqual(
            template.substitute(self.dictionary, {
                'hero': u'обезьянка',
                'number': 1
            }), u'1 обезьянке')

    def test_numeral_2_dependences(self):
        template = Template.create(morph, u'[[number||]] [[hero|number|им]]')
        self.assertEqual(
            template.substitute(self.dictionary, {
                'hero': u'обезьянка',
                'number': 2
            }), u'2 обезьянки')

        template = Template.create(morph, u'[[number||]] [[hero|number|рд]]')
        self.assertEqual(
            template.substitute(self.dictionary, {
                'hero': u'обезьянка',
                'number': 2
            }), u'2 обезьянок')

        template = Template.create(morph, u'[[number||]] [[hero|number|дт]]')
        self.assertEqual(
            template.substitute(self.dictionary, {
                'hero': u'обезьянка',
                'number': 2
            }), u'2 обезьянкам')

    def test_numeral_13_adj_dependences(self):
        template = Template.create(morph, u'[[number||]] [{целый|number|}]')
        self.assertEqual(template.substitute(self.dictionary, {'number': 13}),
                         u'13 целых')

    def test_numeral_5_dependences(self):
        template = Template.create(morph, u'[[number||]] [[hero|number|им]]')
        self.assertEqual(
            template.substitute(self.dictionary, {
                'hero': u'обезьянка',
                'number': 5
            }), u'5 обезьянок')

        template = Template.create(morph, u'[[number||]] [[hero|number|рд]]')
        self.assertEqual(
            template.substitute(self.dictionary, {
                'hero': u'обезьянка',
                'number': 5
            }), u'5 обезьянок')

        template = Template.create(morph, u'[[number||]] [[hero|number|дт]]')
        self.assertEqual(
            template.substitute(self.dictionary, {
                'hero': u'обезьянка',
                'number': 5
            }), u'5 обезьянкам')

    def test_dependences(self):
        template = Template.create(morph, u'[{глупый|hero|рд}] [[hero|рд]]')
        self.assertEqual(
            template.substitute(self.dictionary, {'hero': u'обезьянка'}),
            u'глупой обезьянки')

        template = Template.create(morph,
                                   u'враг [{ударил|hero|буд,3л}] [[hero|вн]]')
        self.assertEqual(
            template.substitute(self.dictionary, {'hero': u'обезьянка'}),
            u'враг ударит обезьянку')

        template = Template.create(morph,
                                   u'крыса [{ударил|прш,жр}] [[hero|вн]]')
        self.assertEqual(
            template.substitute(self.dictionary, {'hero': u'обезьянка'}),
            u'крыса ударила обезьянку')

    def test_fake_substitutions(self):
        template = Template.create(morph, u'[{глупый|hero|рд}] [[hero|рд]]')
        self.assertEqual(
            template.substitute(self.dictionary, {'hero': Fake(u'19x5')}),
            u'глупого 19x5')

    def test_upper_case(self):
        template = Template.create(
            morph,
            u'Первое предложение. [{подставил|hero|прш,загл}] слово в начало, а затем вставим имя от [[hero|рд]]. [[shadow|загл]] пришла.'
        )
        result = template.substitute(self.dictionary, {
            'hero': (u'обезьянка', u'загл'),
            'shadow': u'тень'
        })
        self.assertEqual(
            result,
            u'Первое предложение. Подставила слово в начало, а затем вставим имя от Обезьянки. Тень пришла.'
        )
Exemplo n.º 10
0
    def test_serialization(self):

        with tempfile.NamedTemporaryFile(delete=False) as f:
            dictionary = Dictionary()

            dictionary.add_word(self.monkey)
            dictionary.add_word(self.silly)
            dictionary.add_word(self.hit)

            dictionary.save(storage=f.name)

            dictionary = Dictionary()
            dictionary.load(storage=f.name)

            self.assertEqual(
                dictionary.get_word(u'обезьянка').normalized, u'обезьянка')
            self.assertEqual(
                dictionary.get_word(u'глупый').normalized, u'глупый')
            self.assertEqual(
                dictionary.get_word(u'ударил').normalized, u'ударил')