def test_dependences(self): template = Template.create(morph, u'[{глупый|hero|рд}] [[hero|рд]]') self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка'} ), u'глупой обезьянки') template = Template.create(morph, u'враг [{ударил|hero|буд,3л}] [[hero|вн]]') self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка'} ), u'враг ударит обезьянку') template = Template.create(morph, u'крыса [{ударил|прш,жр}] [[hero|вн]]') self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка'} ), u'крыса ударила обезьянку')
def test_externals(self): template = Template.create(morph, u'ударить [[hero|вн]]') self.assertEqual( template.substitute(self.dictionary, {'hero': u'обезьянка'}), u'ударить обезьянку') template = Template.create(morph, u'ударить [[hero|вн,мн]]') self.assertEqual( template.substitute(self.dictionary, {'hero': u'обезьянка'}), u'ударить обезьянок')
def test_numeral_5_dependences(self): template = Template.create(morph, u'[[number||]] [[hero|number|им]]') self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка', 'number': 5} ), u'5 обезьянок') template = Template.create(morph, u'[[number||]] [[hero|number|рд]]') self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка', 'number': 5} ), u'5 обезьянок') template = Template.create(morph, u'[[number||]] [[hero|number|дт]]') self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка', 'number': 5} ), u'5 обезьянкам')
def test_noun_dependences(self): template = Template.create(morph, u'[[shadow|hero|тв]] [[hero|рд]]') self.assertEqual( template.substitute(self.dictionary, { 'hero': (u'обезьянка', u'мн'), 'shadow': u'тень' }), u'тенями обезьянок')
def test_dependences(self): template = Template.create(morph, u'[{глупый|hero|рд}] [[hero|рд]]') self.assertEqual( template.substitute(self.dictionary, {'hero': u'обезьянка'}), u'глупой обезьянки') template = Template.create(morph, u'враг [{ударил|hero|буд,3л}] [[hero|вн]]') self.assertEqual( template.substitute(self.dictionary, {'hero': u'обезьянка'}), u'враг ударит обезьянку') template = Template.create(morph, u'крыса [{ударил|прш,жр}] [[hero|вн]]') self.assertEqual( template.substitute(self.dictionary, {'hero': u'обезьянка'}), u'крыса ударила обезьянку')
def test_numeral_5_dependences(self): template = Template.create(morph, u'[[number||]] [[hero|number|им]]') self.assertEqual( template.substitute(self.dictionary, { 'hero': u'обезьянка', 'number': 5 }), u'5 обезьянок') template = Template.create(morph, u'[[number||]] [[hero|number|рд]]') self.assertEqual( template.substitute(self.dictionary, { 'hero': u'обезьянка', 'number': 5 }), u'5 обезьянок') template = Template.create(morph, u'[[number||]] [[hero|number|дт]]') self.assertEqual( template.substitute(self.dictionary, { 'hero': u'обезьянка', 'number': 5 }), u'5 обезьянкам')
def test_upper_case(self): template = Template.create( morph, u'Первое предложение. [{подставил|hero|прш,загл}] слово в начало, а затем вставим имя от [[hero|рд]]. [[shadow|загл]] пришла.' ) result = template.substitute(self.dictionary, { 'hero': (u'обезьянка', u'загл'), 'shadow': u'тень' }) self.assertEqual( result, u'Первое предложение. Подставила слово в начало, а затем вставим имя от Обезьянки. Тень пришла.' )
def test_noun_dependences(self): template = Template.create(morph, u'[[shadow|hero|тв]] [[hero|рд]]') self.assertEqual(template.substitute(self.dictionary, {'hero': (u'обезьянка', u'мн'), 'shadow': u'тень'} ), u'тенями обезьянок')
def test_numeral_13_adj_dependences(self): template = Template.create(morph, u'[[number||]] [{целый|number|}]') self.assertEqual(template.substitute(self.dictionary, {'number': 13} ), u'13 целых')
def test_partial_dependence(self): template = Template.create(morph, u'ударить [[hero|вн]]') self.assertEqual(template.substitute(self.dictionary, {'hero': (u'обезьянка', u'мн') } ), u'ударить обезьянок')
def test_internals(self): template = Template.create(morph, u'[{тень|hero|тв}] [[hero|рд]]') self.assertEqual(template.substitute(self.dictionary, {'hero': (u'обезьянка', u'мн')} ), u'тенями обезьянок')
def setUp(self): self.t1 = Template.create(morph, u'ударить [[hero|вн]]') self.t2 = Template.create(morph, u'ударить [[hero|вн,мн]]') self.t3 = Template.create(morph, u'[{тенью|hero|тв}] [[hero|рд]]')
def test_externals(self): template = Template.create(morph, u'ударить [[hero|вн]]') self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка'}), u'ударить обезьянку') template = Template.create(morph, u'ударить [[hero|вн,мн]]') self.assertEqual(template.substitute(self.dictionary, {'hero': u'обезьянка' } ), u'ударить обезьянок')
def test_partial_dependence(self): template = Template.create(morph, u'ударить [[hero|вн]]') self.assertEqual( template.substitute(self.dictionary, {'hero': (u'обезьянка', u'мн')}), u'ударить обезьянок')
def test_fake_substitutions(self): template = Template.create(morph, u'[{глупый|hero|рд}] [[hero|рд]]') self.assertEqual(template.substitute(self.dictionary, {'hero': Fake(u'19x5')} ), u'глупого 19x5')
def test_internals(self): template = Template.create(morph, u'[{тень|hero|тв}] [[hero|рд]]') self.assertEqual( template.substitute(self.dictionary, {'hero': (u'обезьянка', u'мн')}), u'тенями обезьянок')
def test_fake_substitutions(self): template = Template.create(morph, u'[{глупый|hero|рд}] [[hero|рд]]') self.assertEqual( template.substitute(self.dictionary, {'hero': Fake(u'19x5')}), u'глупого 19x5')
def test_numeral_13_adj_dependences(self): template = Template.create(morph, u'[[number||]] [{целый|number|}]') self.assertEqual(template.substitute(self.dictionary, {'number': 13}), u'13 целых')
def import_texts(morph, source_dir, tech_vocabulary_path, voc_storage, dict_storage, tmp_dir='/tmp', check=False): from textgen.templates import Dictionary, Vocabulary, Template from textgen.words import WordBase vocabulary = Vocabulary() user_data = {'modules': {}} if not check: if os.path.exists(voc_storage): vocabulary.load(storage=voc_storage) dictionary = Dictionary() if os.path.exists(dict_storage): dictionary.load(storage=dict_storage) tech_vocabulary = get_tech_vocabulary(tech_vocabulary_path) for word in tech_vocabulary.keys(): word = WordBase.create_from_string(morph, word.strip(), tech_vocabulary) dictionary.add_word(word) for filename in os.listdir(source_dir): if not filename.endswith('.json'): continue texts_path = os.path.join(source_dir, filename) if not os.path.isfile(texts_path): continue group = filename[:-5] if check: check_path = os.path.join(tmp_dir, 'textgen-files-check-' + filename) if os.path.exists(check_path) and os.path.getmtime( check_path) > os.path.getmtime(texts_path): print 'group "%s" has been already processed' % group continue print 'load "%s"' % group with open(texts_path) as f: data = json.loads(f.read()) if group != data['prefix']: raise Exception('filename MUST be equal to prefix') for suffix in data['types']: if suffix == '': raise Exception('type MUST be not equal to empty string') user_data['modules'][data['prefix']] = get_user_data_for_module( data) variables_verbose = data['variables_verbose'] global_variables = data.get('variables', {}) for variable_name in global_variables.keys(): if not variables_verbose.get(variable_name): raise Exception('no verbose name for variable "%s"' % variable_name) for suffix, type_ in data['types'].items(): phrase_key = '%s_%s' % (group, suffix) vocabulary.register_type(phrase_key) if isinstance(type_, list): phrases = type_ local_variables = {} else: phrases = type_['phrases'] local_variables = type_.get('variables', {}) for variable_name in local_variables.keys(): if not variables_verbose.get(variable_name): raise Exception('no verbose name for variable "%s"' % variable_name) variables = copy.copy(global_variables) variables.update(local_variables) for phrase in phrases: template_phrase, test_phrase = phrase template = Template.create( morph, template_phrase, available_externals=variables.keys(), tech_vocabulary=tech_vocabulary) vocabulary.add_phrase(phrase_key, template) for value in variables.values(): if isinstance(value, numbers.Number): continue word = WordBase.create_from_string( morph, value, tech_vocabulary) dictionary.add_word(word) for string in template.get_internal_words(): word = WordBase.create_from_string( morph, string, tech_vocabulary) dictionary.add_word(word) test_result = template.substitute(dictionary, variables) test_result_normalized = efication(test_result) test_phrase_normalized = efication(test_phrase) if test_result_normalized != test_phrase_normalized: msg = None for i in xrange( min(len(test_result_normalized), len(test_phrase_normalized))): if test_result_normalized[ i] != test_phrase_normalized[i]: msg = ''' wrong test_render for phrase "%s" prefix: "%s" diff: %s|%s''' % (template_phrase, test_result_normalized[:i], test_result_normalized[i], test_phrase_normalized[i]) break if msg is None: msg = 'different len: "%s"|"%s"' % ( test_result_normalized[i:], test_phrase_normalized[i:]) raise TextgenException(msg) if check: with open(check_path, 'w') as f: f.write('1') if not check: vocabulary.save(storage=voc_storage) dictionary.save(storage=dict_storage) return user_data
def import_texts(morph, source_dir, tech_vocabulary_path, voc_storage, dict_storage, tmp_dir='/tmp', check=False): from textgen.templates import Dictionary, Vocabulary, Template from textgen.words import WordBase vocabulary = Vocabulary() user_data = {'modules': {}} if not check: if os.path.exists(voc_storage): vocabulary.load(storage=voc_storage) dictionary = Dictionary() if os.path.exists(dict_storage): dictionary.load(storage=dict_storage) tech_vocabulary = get_tech_vocabulary(tech_vocabulary_path) for word in tech_vocabulary.keys(): word = WordBase.create_from_string(morph, word.strip(), tech_vocabulary) dictionary.add_word(word) for filename in os.listdir(source_dir): if not filename.endswith('.json'): continue texts_path = os.path.join(source_dir, filename) if not os.path.isfile(texts_path): continue group = filename[:-5] if check: check_path = os.path.join(tmp_dir, 'textgen-files-check-'+filename) if os.path.exists(check_path) and os.path.getmtime(check_path) > os.path.getmtime(texts_path): print 'group "%s" has been already processed' % group continue print 'load "%s"' % group with open(texts_path) as f: data = json.loads(f.read()) if group != data['prefix']: raise Exception('filename MUST be equal to prefix') for suffix in data['types']: if suffix == '': raise Exception('type MUST be not equal to empty string') user_data['modules'][data['prefix']] = get_user_data_for_module(data) variables_verbose = data['variables_verbose'] global_variables = data.get('variables', {}) for variable_name in global_variables.keys(): if not variables_verbose.get(variable_name): raise Exception('no verbose name for variable "%s"' % variable_name) for suffix, type_ in data['types'].items(): phrase_key = '%s_%s' % (group , suffix) vocabulary.register_type(phrase_key) if isinstance(type_, list): phrases = type_ local_variables = {} else: phrases = type_['phrases'] local_variables = type_.get('variables', {}) for variable_name in local_variables.keys(): if not variables_verbose.get(variable_name): raise Exception('no verbose name for variable "%s"' % variable_name) variables = copy.copy(global_variables) variables.update(local_variables) for phrase in phrases: template_phrase, test_phrase = phrase template = Template.create(morph, template_phrase, available_externals=variables.keys(), tech_vocabulary=tech_vocabulary) vocabulary.add_phrase(phrase_key, template) for value in variables.values(): if isinstance(value, numbers.Number): continue word = WordBase.create_from_string(morph, value, tech_vocabulary) dictionary.add_word(word) for string in template.get_internal_words(): word = WordBase.create_from_string(morph, string, tech_vocabulary) dictionary.add_word(word) test_result = template.substitute(dictionary, variables) test_result_normalized = efication(test_result) test_phrase_normalized = efication(test_phrase) if test_result_normalized != test_phrase_normalized: msg = None for i in xrange(min(len(test_result_normalized), len(test_phrase_normalized))): if test_result_normalized[i] != test_phrase_normalized[i]: msg = ''' wrong test_render for phrase "%s" prefix: "%s" diff: %s|%s''' % (template_phrase, test_result_normalized[:i], test_result_normalized[i], test_phrase_normalized[i]) break if msg is None: msg = 'different len: "%s"|"%s"' % (test_result_normalized[i:], test_phrase_normalized[i:]) raise TextgenException(msg) if check: with open(check_path, 'w') as f: f.write('1') if not check: vocabulary.save(storage=voc_storage) dictionary.save(storage=dict_storage) return user_data
def test_upper_case(self): template = Template.create(morph, u'Первое предложение. [{подставил|hero|прш,загл}] слово в начало, а затем вставим имя от [[hero|рд]]. [[shadow|загл]] пришла.') result = template.substitute(self.dictionary, {'hero': (u'обезьянка', u'загл'), 'shadow': u'тень'} ) self.assertEqual(result, u'Первое предложение. Подставила слово в начало, а затем вставим имя от Обезьянки. Тень пришла.')