def test_parse_xml(self): dct = parse_opencorpora_xml(self.XML_PATH) assert dct.version == '0.92' assert dct.revision == '389440' assert dct.links[0] == ('5', '6', '1') assert len(dct.links) == 12 assert dct.grammemes[1] == ('NOUN', 'POST', 'СУЩ', 'имя существительное') assert len(dct.grammemes) == 111 assert dct.lexemes['14'] == [('ёжиться', 'INFN,impf,intr')]
def test_parse_xml(self): dct = parse_opencorpora_xml(self.XML_PATH) assert dct.version == '0.92' assert dct.revision == '389440' assert dct.links[0] == ('5', '6', '1') assert len(dct.links) == 12 assert dct.grammemes[1] == ('NOUN', 'POST', 'СУЩ', 'имя существительное') assert len(dct.grammemes) == 111 assert dct.lexemes['14'] == [('ёжиться', 'INFN,impf,intr')]
def make_test_suite(opencorpora_dict_path, out_path, word_limit=100): """ Extract test data from OpenCorpora .xml dictionary (at least ``word_limit`` words for each distinct gram. tag) and save it to a file. """ logger.debug('loading dictionary to memory...') parsed_dict = parse_opencorpora_xml(opencorpora_dict_path) logger.debug('preparing...') parses = _get_word_parses(parsed_dict.lexemes) logger.debug('dictionary size: %d', len(parses)) logger.debug('handling umlauts...') parses = _add_ee_parses(parses) logger.debug('dictionary size: %d', len(parses)) logger.debug('building test suite...') suite = _get_test_suite(parses, word_limit) logger.debug('test suite size: %d', len(suite)) logger.debug('saving...') _save_test_suite(out_path, suite, parsed_dict.revision)
def make_test_suite(opencorpora_dict_path, out_path, word_limit=100): """ Extract test data from OpenCorpora .xml dictionary (at least ``word_limit`` words for each distinct gram. tag) and save it to a file. """ logger.debug('loading dictionary to memory...') parsed_dict = parse_opencorpora_xml(opencorpora_dict_path) logger.debug('preparing...') parses = _get_word_parses(parsed_dict.lexemes) logger.debug('dictionary size: %d', len(parses)) logger.debug('handling umlauts...') parses = _add_ee_parses(parses) logger.debug('dictionary size: %d', len(parses)) logger.debug('building test suite...') suite = _get_test_suite(parses, word_limit) logger.debug('test suite size: %d', len(suite)) logger.debug('saving...') _save_test_suite(out_path, suite, parsed_dict.revision)