def test_blocks(self): """Test that the right block element is used in the right context.""" root = self.testdoc.getroot() self.assertEqual( 1, len(root.findall('.//%s//%s' % (self.ns('text'), self.ns('ab'))))) self.assertEqual( 0, len(root.findall('.//%s//%s' % (self.ns('text'), self.ns('p'))))) d_json = helpers.load_JSON_file(self.testfiles['m3519']) d_root = from_sc(d_json, special_chars=self.glyphs, text_filter=helpers.tpen_filter) self.assertEqual( 0, len( d_root.findall( './/%s/%s/%s' % (self.ns('text'), self.ns('body'), self.ns('ab'))))) self.assertEqual( 2, len( d_root.findall( './/%s/%s/%s' % (self.ns('text'), self.ns('body'), self.ns('p')))))
def setUp(self): self.settings = config() self.tei_ns = self.settings['namespaces']['tei'] self.xml_ns = self.settings['namespaces']['xml'] self.glyphs = helpers.glyph_struct(self.settings['armenian_glyphs']) self.testfiles = self.settings['testfiles'] msdata = helpers.load_JSON_file(self.testfiles['json']) self.testdoc_noglyphs = from_sc(msdata) self.testdoc = from_sc(msdata, special_chars=self.glyphs) self.doc3519 = from_sc(helpers.load_JSON_file(self.testfiles['m3519']), special_chars=self.glyphs, numeric_parser=helpers.armenian_numbers, text_filter=helpers.tpen_filter)
def test_blocks(self): """Test that the right block element is used in the right context.""" root = self.testdoc.getroot() self.assertEqual(1, len(root.findall('.//%s//%s' % (self.ns('text'), self.ns('ab'))))) self.assertEqual(0, len(root.findall('.//%s//%s' % (self.ns('text'), self.ns('p'))))) d_json = helpers.load_JSON_file(self.testfiles['m3519']) d_root = from_sc(d_json, special_chars=self.glyphs, text_filter=helpers.tpen_filter) self.assertEqual(0, len(d_root.findall('.//%s/%s/%s' % (self.ns('text'), self.ns('body'), self.ns('ab'))))) self.assertEqual(2, len(d_root.findall('.//%s/%s/%s' % (self.ns('text'), self.ns('body'), self.ns('p')))))
def test_postprocess(self): d_json = helpers.load_JSON_file(self.testfiles['m3519']) d_root = from_sc(d_json, special_chars=self.glyphs, text_filter=helpers.tpen_filter, postprocess=helpers.postprocess) visited = False for tag in d_root.iter(self.ns('pb')): visited = True self.assertEquals('interesting', tag.get('ana')) self.assertTrue(visited)
def test_parse_error(self): """Check that a reasonable error message is returned from a JSON file that contains badly-formed XML.""" md = {'short_error': True} with io.StringIO() as buf, redirect_stderr(buf): badresult = from_sc(self.brokendata, md) errormsg = buf.getvalue() self.assertRegex(errormsg, 'Parsing error in the JSON') errorlines = errormsg.splitlines()[1:] self.assertEqual(len(errorlines), 55) self.assertRegex(errorlines[0], 'Affected portion of XML is 493: \<pb')
def test_members(self): msdata = helpers.load_JSON_file(self.testfiles['json']) testdoc = from_sc(msdata, members=helpers.test_members(), special_chars=self.glyphs) respstmt = testdoc.xpath('//tei:fileDesc/tei:editionStmt/tei:respStmt', namespaces=self.namespaces) self.assertEqual(1, len(respstmt)) self.assertEqual('u281', respstmt[0].get(self.ns('id'))) self.assertEqual('Me M. and I', respstmt[0].find(self.ns('name')).text) for line in testdoc.iter(self.ns('lb')): self.assertEquals('#u281', line.get('resp'))
def setUp(self): self.settings = config() self.tei_ns = self.settings['namespaces']['tei'] self.xml_ns = self.settings['namespaces']['xml'] self.glyphs = helpers.glyph_struct(self.settings['armenian_glyphs']) self.testfiles = self.settings['testfiles'] msdata = helpers.load_JSON_file(self.testfiles['json']) self.testdoc_noglyphs = from_sc(msdata) self.testdoc = from_sc ( msdata, special_chars = self.glyphs ) self.doc3519 = from_sc( helpers.load_JSON_file(self.testfiles['m3519']), special_chars=self.glyphs, numeric_parser=helpers.armenian_numbers, text_filter=helpers.tpen_filter )
def test_members(self): msdata = helpers.load_JSON_file(self.testfiles['json']) testdoc = from_sc( msdata, members=helpers.test_members(), special_chars=self.glyphs ) respstmt = testdoc.xpath('//tei:fileDesc/tei:editionStmt/tei:respStmt', namespaces=self.namespaces) self.assertEqual(1, len(respstmt)) self.assertEqual('u281', respstmt[0].get(self.ns('id'))) self.assertEqual('Me M. and I', respstmt[0].find(self.ns('name')).text) for line in testdoc.iter(self.ns('lb')): self.assertEquals('#u281', line.get('resp'))
def setUp(self): settings = config() self.namespaces = settings['namespaces'] self.tei_ns = settings['namespaces']['tei'] self.xml_ns = settings['namespaces']['xml'] self.glyphs = helpers.glyph_struct(settings['armenian_glyphs']) self.testfiles = settings['testfiles'] msdata = helpers.load_JSON_file(self.testfiles['json']) self.testdoc = from_sc( msdata, special_chars=self.glyphs ) user_defined = {'title': 'Ժամանակագրութիւն', 'author': 'Մատթէոս Ուռհայեցի'} legacydata = helpers.load_JSON_file(self.testfiles['legacy']) self.legacydoc = from_sc(legacydata, metadata=user_defined, special_chars=self.glyphs, numeric_parser=helpers.armenian_numbers, text_filter=helpers.tpen_filter) self.brokendata = helpers.load_JSON_file(self.testfiles['broken'])
def setUp(self): settings = config() self.namespaces = settings['namespaces'] self.tei_ns = settings['namespaces']['tei'] self.xml_ns = settings['namespaces']['xml'] self.glyphs = helpers.glyph_struct(settings['armenian_glyphs']) self.testfiles = settings['testfiles'] msdata = helpers.load_JSON_file(self.testfiles['json']) self.testdoc = from_sc(msdata, special_chars=self.glyphs) user_defined = { 'title': 'Ժամանակագրութիւն', 'author': 'Մատթէոս Ուռհայեցի' } legacydata = helpers.load_JSON_file(self.testfiles['legacy']) self.legacydoc = from_sc(legacydata, metadata=user_defined, special_chars=self.glyphs, numeric_parser=helpers.armenian_numbers, text_filter=helpers.tpen_filter) self.brokendata = helpers.load_JSON_file(self.testfiles['broken'])
import json import sys sys.path.append('transcription') import config from lxml import etree from tpen2tei.parse import from_sc from tpen2tei.wordtokenize import from_etree with open(sys.argv[1], encoding='utf-8') as jfile: msdata = json.load(jfile) xmltree = from_sc(msdata, metadata=config.metadata, special_chars=config.special_chars, numeric_parser=config.numeric_parser, text_filter=config.transcription_filter) sys.stdout.buffer.write( etree.tostring(xmltree, encoding='utf-8', pretty_print=True, xml_declaration=True))