def test_html_serialize(self): chunks = budou.ChunkList([ budou.Chunk('Hello'), budou.Chunk.space(), budou.Chunk(u'今天'), budou.Chunk(u'天气'), budou.Chunk(u'很好') ]) attributes = {'class': 'foo'} expected = ('<span>' 'Hello ' u'<span class="foo">今天</span>' u'<span class="foo">天气</span>' u'<span class="foo">很好</span>' '</span>') result = self.parser._html_serialize(chunks, attributes) self.assertEqual(result, expected, 'The chunks should be compiled to a HTML code.') chunks = budou.ChunkList([ budou.Chunk('Hey<'), budou.Chunk('<script>alert(1)</script>'), budou.Chunk('>guys') ]) attributes = {'class': 'foo'} expected = ('<span>' 'Hey<<script>alert(1)</script>>guys' '</span>') result = self.parser._html_serialize(chunks, attributes) self.assertEqual(result, expected, 'HTML tags included in a chunk should be encoded.')
def test_html_serialize(self): chunks = budou.ChunkList([ budou.Chunk('Hello'), budou.Chunk.space(), budou.Chunk(u'今天'), budou.Chunk(u'天气'), budou.Chunk(u'很好') ]) attributes = {'class': 'foo'} expected = ('<span>' 'Hello ' u'<span class="foo">今天</span>' u'<span class="foo">天气</span>' u'<span class="foo">很好</span>' '</span>') result = self.parser._html_serialize(chunks, attributes, None) self.assertEqual(result, expected, 'The chunks should be compiled to a HTML code.') chunks = budou.ChunkList([ budou.Chunk('Hey<'), budou.Chunk('<script>alert(1)</script>'), budou.Chunk('>guys') ]) attributes = {'class': 'foo'} expected = ('<span>' 'Hey<<script>alert(1)</script>>guys' '</span>') result = self.parser._html_serialize(chunks, attributes, None) self.assertEqual(result, expected, 'HTML tags included in a chunk should be encoded.') chunks = budou.ChunkList([ budou.Chunk(u'去年'), budou.Chunk(u'インフルエンザに'), budou.Chunk(u'かかった。') ]) attributes = {'class': 'foo'} expected = ('<span>' u'<span class="foo">去年</span>' u'インフルエンザに' u'<span class="foo">かかった。</span>' '</span>') result = self.parser._html_serialize(chunks, attributes, 6) self.assertEqual( result, expected, 'Chunks that exceed the max length should not be enclosed by a span.' )
def test_group_chunks_by_entities(self): # chunks: foo bar baz # entity: ___ bar ___ chunks = budou.ChunkList( [budou.Chunk('foo'), budou.Chunk('bar'), budou.Chunk('baz')]) entities = [{'beginOffset': 3, 'content': 'bar'}] expected = ['foo', 'bar', 'baz'] result = self.parser._group_chunks_by_entities(chunks, entities) self.assertEqual(expected, [chunk.word for chunk in result]) # chunks: foo bar baz # entity: foo ba_ ___ chunks = budou.ChunkList( [budou.Chunk('foo'), budou.Chunk('bar'), budou.Chunk('baz')]) entities = [{'beginOffset': 0, 'content': 'fooba'}] expected = ['foobar', 'baz'] result = self.parser._group_chunks_by_entities(chunks, entities) self.assertEqual(expected, [chunk.word for chunk in result])
def test_html_serialize(self): chunks = budou.ChunkList([ budou.Chunk('a'), budou.Chunk('b'), budou.Chunk.space(), budou.Chunk('c') ]) attributes = {'class': 'foo'} expected = ('<span>' '<span class="foo">a</span>' '<span class="foo">b</span> ' '<span class="foo">c</span>' '</span>') result = self.parser._html_serialize(chunks, attributes) self.assertEqual(result, expected, 'The chunks should be compiled to a HTML code.')
def test_concatenate_inner(self): chunks = budou.ChunkList() chunks.append(budou.Chunk('ab', dependency=None)) chunks.append(budou.Chunk('cde', dependency=True)) chunks.append(budou.Chunk('fgh', dependency=False)) chunks = self.parser._concatenate_inner(chunks, True) self.assertEqual(['ab', 'cdefgh'], [ chunk.word for chunk in chunks ], 'Chunks should be concatenated if they depends on the following word.' ) self.assertEqual( [None, False], [chunk.dependency for chunk in chunks], 'Dependency should persist even if it\'s concatenated by others.') chunks = self.parser._concatenate_inner(chunks, False) self.assertEqual(['abcdefgh'], [ chunk.word for chunk in chunks ], 'Chunks should be concatenated if they depends on the previous word.' )
def setUp(self): chunks = budou.ChunkList() chunks.append(budou.Chunk('ab', dependency=None)) chunks.append(budou.Chunk('cde', dependency=True)) chunks.append(budou.Chunk('fgh', dependency=False)) self.chunks = chunks