def test_section_1_parser_missing_meta(self): result = list(section_1_parser(StringIO(SECTION_1_SAMPLE_2))) self.assertEqual(1, len(result)) self.assertEqual('http://www.adressa.no/artikkel.awml?artikkelref=907584799', result[0]['url']) self.assertEqual('AA', result[0]['source']) self.assertEqual(98, result[0]['pub_year']) self.assertEqual(None, result[0]['pub_month']) self.assertEqual(10, result[0]['pub_day']) self.assertEqual(1, result[0]['corpus_section']) self.assertEqual(['Ap', ':'], result[0]['tokens'])
def test_section_1_parser_missing_meta(self): result = list(section_1_parser(StringIO(SECTION_1_SAMPLE_2))) self.assertEqual(1, len(result)) self.assertEqual("http://www.adressa.no/artikkel.awml?artikkelref=907584799", result[0]["url"]) self.assertEqual("AA", result[0]["source"]) self.assertEqual(98, result[0]["pub_year"]) self.assertEqual(None, result[0]["pub_month"]) self.assertEqual(10, result[0]["pub_day"]) self.assertEqual(1, result[0]["corpus_section"]) self.assertEqual(["Ap", ":"], result[0]["tokens"])
def test_section_1_parser(self): result = list(section_1_parser(StringIO(SECTION_1_SAMPLE_1))) self.assertEqual(2, len(result)) self.assertEqual('http://odin.dep.no/fd/prm/1998/k4/981013.html', result[0]['url']) self.assertEqual('OD', result[0]['source']) self.assertEqual(98, result[0]['pub_year']) self.assertEqual(10, result[0]['pub_month']) self.assertEqual(13, result[0]['pub_day']) self.assertEqual(1, result[0]['corpus_section']) self.assertEqual(['Pressemelding', 'Nr', '.', '064/98', 'Dato', ':', '13', 'oktober', '1998'], result[0]['tokens']) self.assertEqual('http://odin.dep.no/fid/prm/1998/k4/981016.html', result[1]['url']) self.assertEqual('OD', result[1]['source']) self.assertEqual(98, result[1]['pub_year']) self.assertEqual(10, result[1]['pub_month']) self.assertEqual(16, result[1]['pub_day']) self.assertEqual(1, result[1]['corpus_section']) self.assertEqual(['Pressemelding', 'Nr', '.', '55/98', 'Dato', '16', '.', 'oktober', '1998'], result[1]['tokens'])
def test_section_1_parser(self): result = list(section_1_parser(StringIO(SECTION_1_SAMPLE_1))) self.assertEqual(2, len(result)) self.assertEqual("http://odin.dep.no/fd/prm/1998/k4/981013.html", result[0]["url"]) self.assertEqual("OD", result[0]["source"]) self.assertEqual(98, result[0]["pub_year"]) self.assertEqual(10, result[0]["pub_month"]) self.assertEqual(13, result[0]["pub_day"]) self.assertEqual(1, result[0]["corpus_section"]) self.assertEqual( ["Pressemelding", "Nr", ".", "064/98", "Dato", ":", "13", "oktober", "1998"], result[0]["tokens"] ) self.assertEqual("http://odin.dep.no/fid/prm/1998/k4/981016.html", result[1]["url"]) self.assertEqual("OD", result[1]["source"]) self.assertEqual(98, result[1]["pub_year"]) self.assertEqual(10, result[1]["pub_month"]) self.assertEqual(16, result[1]["pub_day"]) self.assertEqual(1, result[1]["corpus_section"]) self.assertEqual( ["Pressemelding", "Nr", ".", "55/98", "Dato", "16", ".", "oktober", "1998"], result[1]["tokens"] )