def test_HTMLbis(self): data = '<em>J. David</em>' result = [((TEXT, u'J. David'),)] segments = [] for seg, context, offset in get_units(HTMLParser(data)): segments.append(seg) self.assertEqual(segments, result)
def test_surrounding_format(self): data = '<em>Surrounding format elements should be extracted !</em>' expected =[((TEXT, u'Surrounding format elements should be extracted !'),)] segments = [] for seg, context, offset in get_units(HTMLParser(data)): segments.append(seg) self.assertEqual(segments, expected)
def test_HTML3(self): data = '-- toto is here -- *I am*' result = [((TEXT, u'-- toto is here -- *I am*'),)] segments = [] for seg, context, offset in get_units(HTMLParser(data)): segments.append(seg) self.assertEqual(segments, result)
def test_ignore_tags(self): data = 'Hello <em> Baby.</em> How are you ?' expected = [((TEXT, u'Hello '), (START_FORMAT, 1), (TEXT, u' Baby.'), (END_FORMAT, 1)), ((TEXT, u'How are you ?'),)] segments = [] for seg, context, offset in get_units(HTMLParser(data)): segments.append(seg) self.assertEqual(segments, expected)
def test_HTML4(self): data = ' <a href="http://www.debian.org/"> Debian </a> Hello. Toto' result = [((START_FORMAT, 1), (TEXT, u' Debian '), (END_FORMAT, 1), (TEXT, u' Hello.')), ((TEXT, u'Toto'),)] segments = [] for seg, context, offset in get_units(HTMLParser(data)): segments.append(seg) self.assertEqual(segments, result)
def test_HTML(self): data = '<a href="; t. ffff">hello </a> GOGO' segments = [] for seg, context, offset in get_units(HTMLParser(data)): segments.append(seg) result = [((START_FORMAT, 1), (TEXT, u'hello '), (END_FORMAT, 1), (TEXT, u' GOGO'))] self.assertEqual(segments, result)
def test_paragraph(self): """Test formatted paragraph""" content = ('<office:text>' '<text:p text:style-name="Standard">' 'hello world' '</text:p>' '</office:text>') content = odt_template % content messages = XMLParser(content) messages = [unit[0] for unit in get_units(messages)] expected = [((TEXT, u'hello world'), )] self.assertEqual(messages, expected)
def test_paragraph(self): """Test formatted paragraph""" content = ('<office:text>' '<text:p text:style-name="Standard">' 'hello world' '</text:p>' '</office:text>') content = odt_template % content messages = XMLParser(content) messages = [unit[0] for unit in get_units(messages)] expected = [((TEXT, u'hello world'),)] self.assertEqual(messages, expected)
def test_table(self): content = """ <office:text> <table:table table:name="Tableau1" table:style-name="Tableau1"> <table:table-column table:style-name="Tableau1.A" table:number-columns-repeated="3"/> <table:table-row> <table:table-cell table:style-name="Tableau1.A1" office:value-type="string"> <text:p text:style-name="Table_20_Contents">A</text:p> </table:table-cell> <table:table-cell table:style-name="Tableau1.A1" office:value-type="string"> <text:p text:style-name="Table_20_Contents">B</text:p> </table:table-cell> <table:table-cell table:style-name="Tableau1.C1" office:value-type="string"> <text:p text:style-name="Table_20_Contents">C</text:p> </table:table-cell> </table:table-row> <table:table-row> <table:table-cell table:style-name="Tableau1.A2" office:value-type="string"> <text:p text:style-name="Table_20_Contents">D</text:p> </table:table-cell> <table:table-cell table:style-name="Tableau1.A2" office:value-type="string"> <text:p text:style-name="Table_20_Contents">E</text:p> </table:table-cell> <table:table-cell table:style-name="Tableau1.C2" office:value-type="string"> <text:p text:style-name="Table_20_Contents">F</text:p> </table:table-cell> </table:table-row> </table:table> </office:text> """ content = odt_template % content messages = XMLParser(content) messages = [unit[0] for unit in get_units(messages)] expected= [((TEXT, u'A'),), ((TEXT, u'B'),), ((TEXT, u'C'),), ((TEXT, u'D'),), ((TEXT, u'E'),), ((TEXT, u'F'),)] self.assertEqual(messages, expected)
def test_iter_segmentation(self): """Here is a message surrounded by format elements and which contains others segments. The segments must be well extracted by the iterative algorithm.""" data = '<span>This text contains many sentences. A sentence. ' \ 'Another one. This text must be well segmented. </span>' expected = [((TEXT, u'This text contains many sentences.'),), ((TEXT, u'A sentence.'),), ((TEXT, u'Another one.'),), ((TEXT, u'This text must be well segmented.'),)] segments = [] for seg, context, offset in get_units(HTMLParser(data)): segments.append(seg) self.assertEqual(segments, expected)
def test_translation_paragraph(self): """Test translation of an element content""" po = POFile(string='msgctxt "paragraph"\n' 'msgid "hello world"\n' 'msgstr "hola mundo"\n') content = ('<office:text>' '<text:p text:style-name="Standard">' 'hello world' '</text:p>' '</office:text>') content = odt_template % content messages = XMLParser(content) messages = translate(messages, po) messages = [unit[0] for unit in get_units(messages)] self.assertEqual(messages, [((TEXT, u'hola mundo'), )])
def test_translation_paragraph(self): """Test translation of an element content""" po = POFile(string= 'msgctxt "paragraph"\n' 'msgid "hello world"\n' 'msgstr "hola mundo"\n') content = ('<office:text>' '<text:p text:style-name="Standard">' 'hello world' '</text:p>' '</office:text>') content = odt_template % content messages = XMLParser(content) messages = translate(messages, po) messages = [unit[0] for unit in get_units(messages)] self.assertEqual(messages, [((TEXT, u'hola mundo'),)])
def get_units(self, srx_handler=None): for source, context, line in get_units(self.events, srx_handler): if len(source) > 1 or subs_expr_solo.match(source[0][1]) is None: yield source, context, line
def get_units(self, srx_handler=None): for filename in ['content.xml', 'meta.xml', 'styles.xml']: events = self.get_events(filename) for message in get_units(events, srx_handler): # FIXME the line number has no sense here yield message