def test_raw_xml_18(self): input_file = os.path.join(self._folder, 'parser_test_18.xml') with open(input_file, 'r', encoding='utf-8') as f: content = f.read() document = self._xml_parser.read(content) clause = document.get_content_interval(Interval(1997, 1997)) self.assertEqual(document.xml[clause.start:clause.end], 'Premises', 'Tokens not found') clause = document.get_content_interval(Interval(3640, 3640)) self.assertEqual(document.xml[clause.start:clause.end], '2.2.2', 'Tokens not found')
def test_raw_xml_16(self): input_file = os.path.join(self._folder, 'parser_test_16.xml') with open(input_file, 'r', encoding='utf-8') as f: content = f.read() document = self._xml_parser.read(content) clause = document.get_content_interval(Interval(81, 81)) self.assertEqual(document.xml[clause.start:clause.end], 'is', 'Tokens not found') clause = document.get_content_interval(Interval(105, 105)) self.assertEqual(document.xml[clause.start:clause.end], '"', 'Tokens not found')
def test_raw_xml_3(self): input_file = os.path.join(self._folder, 'parser_test_3.xml') with open(input_file, 'r', encoding='utf-8') as f: content = f.read() document = self._xml_parser.read(content) clause = document.get_content_interval(Interval(46, 46)) self.assertEqual(document.xml[clause.start:clause.end], 'uonsag')
def test_raw_xml_5(self): input_file = os.path.join(self._folder, 'parser_test_5.xml') with open(input_file, 'r', encoding='utf-8') as f: content = f.read() document = self._xml_parser.read(content) clause = document.get_content_interval(Interval(39, 43)) self.assertEqual(document.xml[clause.start:clause.end], '"Class A-1 Notes"')
def test_raw_xml_1(self): input_file = os.path.join(self._folder, 'parser_test_1.xml') with open(input_file, 'r', encoding='utf-8') as f: content = f.read() document = self._xml_parser.read(content) clause = document.get_content_interval(Interval(71, 71)) self.assertEqual(document.xml[clause.start:clause.end], '1</emphasis>.2', 'Raw xml indices are not correct')
def test_raw_xml_17(self): input_file = os.path.join(self._folder, 'parser_test_17.xml') with open(input_file, 'r', encoding='utf-8') as f: content = f.read() document = self._xml_parser.read(content) clause = document.get_content_interval(Interval(6, 8)) self.assertEqual(document.xml[clause.start:clause.end], '8th February 2008', 'Tokens not found')
def test_raw_xml_4(self): input_file = os.path.join(self._folder, 'parser_test_4.xml') with open(input_file, 'r', encoding='utf-8') as f: content = f.read() document = self._xml_parser.read(content) clause = document.get_content_interval(Interval(15366, 15373)) self.assertEqual( document.xml[clause.start:clause.end], 'Section 4.03. Name Change or Relocation</emphasis>.')
def test_raw_xml_15(self): """ Tails needs spaces as well""" input_file = os.path.join(self._folder, 'parser_test_15.xml') with open(input_file, 'r', encoding='utf-8') as f: content = f.read() document = self._xml_parser.read(content) clause = document.get_content_interval(Interval(36, 36)) self.assertEqual(document.xml[clause.start:clause.end], 'means', 'Tokens not found')
def test_raw_xml_11(self): """ Long tokens""" input_file = os.path.join(self._folder, 'parser_test_11.xml') with open(input_file, 'r', encoding='utf-8') as f: content = f.read() document = self._xml_parser.read(content) clause = document.get_content_interval(Interval(241, 269)) self.assertEqual(document.xml[clause.start:clause.end], '_____________________________', 'Tokens not found')
def test_raw_xml_8(self): input_file = os.path.join(self._folder, 'parser_test_8.xml') with open(input_file, 'r', encoding='utf-8') as f: content = f.read() document = self._xml_parser.read(content) clause = document.get_content_interval(Interval(14, 14)) self.assertEqual( document.xml[clause.start:clause.end], 'S</emphasis></emphasis></box><box top="880" left="557" height="9" width="6" font_size="3" font-family="Times"><emphasis role="italics"><emphasis role="bold">.', 'Tokens not found')
def test_raw_xml_7(self): input_file = os.path.join(self._folder, 'parser_test_7.xml') with open(input_file, 'r', encoding='utf-8') as f: content = f.read() document = self._xml_parser.read(content) clause = document.get_content_interval(Interval(11, 13)) self.assertEqual( document.xml[clause.start:clause.end], '"</box><box top="834" left="22" height="13" width="549" font_size="9" font-family="Times"> "' )
def test_raw_xml_2(self): input_file = os.path.join(self._folder, 'parser_test_2.xml') with open(input_file, 'r', encoding='utf-8') as f: content = f.read() document = self._xml_parser.read(content) payment_clause = document.get_content_interval(Interval(125, 127)) self.assertEqual(document.xml[payment_clause.start:payment_clause.end], 'Non</emphasis>-<emphasis role="bold">payment', 'Raw xml indices are not correct')