def test_render_token_with_punctuation(self): token_text = "')." expected_text = token_text token_dict = self._maketokendict(token=token_text, tokentype=tokenizer.TOKEN_SPACE) rendered = htmlgenerator.render_token(token_dict) self.assertEqual(htmlgenerator.TEXT_NODE, rendered['node_type']) self.assertEqual(expected_text, rendered['text'])
def test_render_token_with_multiple_spaces(self): token_text = " " * 3 expected_text = token_text.replace(" ", "\u00A0\u00A0") token_dict = self._maketokendict(token=token_text, tokentype=tokenizer.TOKEN_SPACE) rendered = htmlgenerator.render_token(token_dict) self.assertEqual(htmlgenerator.TEXT_NODE, rendered['node_type']) self.assertEqual(expected_text, rendered['text'])
def test_render_token_english_word(self): token_text = "hypothetical" token_dict = self._maketokendict(token=token_text, tokentype=tokenizer.TOKEN_WORD) rendered = htmlgenerator.render_token(token_dict) node_type, el = rendered['node_type'], rendered['element'] self.assertEqual(htmlgenerator.ELEMENT_NODE, node_type) self.assertEqual("span", el.tag) self.assertEqual({"class": "word"}, el.attrib) self.assertEqual(token_text, el.text)
def test_render_token_russian_word(self): token_text = "первоку́рсник" token_dict = self._maketokendict(token=token_text, tokentype=tokenizer.TOKEN_RUS, level="3A", form_ids=["174128"]) rendered = htmlgenerator.render_token(token_dict) node_type, el = rendered['node_type'], rendered['element'] self.assertEqual(htmlgenerator.ELEMENT_NODE, node_type) self.assertEqual("span", el.tag) self.assertEqual({ "class": "word parsed level3", "data-form-ids": ",".join(token_dict['form_ids']), "data-level": token_dict['level'] }, el.attrib) self.assertEqual(token_text, el.text)