示例#1
0
 def test_section_1_parser_missing_meta(self):
     result = list(section_1_parser(StringIO(SECTION_1_SAMPLE_2)))
     self.assertEqual(1, len(result))
     self.assertEqual('http://www.adressa.no/artikkel.awml?artikkelref=907584799', result[0]['url'])
     self.assertEqual('AA', result[0]['source'])
     self.assertEqual(98, result[0]['pub_year'])
     self.assertEqual(None, result[0]['pub_month'])
     self.assertEqual(10, result[0]['pub_day'])
     self.assertEqual(1, result[0]['corpus_section'])
     self.assertEqual(['Ap', ':'], result[0]['tokens'])
 def test_section_1_parser_missing_meta(self):
     result = list(section_1_parser(StringIO(SECTION_1_SAMPLE_2)))
     self.assertEqual(1, len(result))
     self.assertEqual("http://www.adressa.no/artikkel.awml?artikkelref=907584799", result[0]["url"])
     self.assertEqual("AA", result[0]["source"])
     self.assertEqual(98, result[0]["pub_year"])
     self.assertEqual(None, result[0]["pub_month"])
     self.assertEqual(10, result[0]["pub_day"])
     self.assertEqual(1, result[0]["corpus_section"])
     self.assertEqual(["Ap", ":"], result[0]["tokens"])
示例#3
0
    def test_section_1_parser(self):
        result = list(section_1_parser(StringIO(SECTION_1_SAMPLE_1)))
        self.assertEqual(2, len(result))
        self.assertEqual('http://odin.dep.no/fd/prm/1998/k4/981013.html', result[0]['url'])
        self.assertEqual('OD', result[0]['source'])
        self.assertEqual(98, result[0]['pub_year'])
        self.assertEqual(10, result[0]['pub_month'])
        self.assertEqual(13, result[0]['pub_day'])
        self.assertEqual(1, result[0]['corpus_section'])
        self.assertEqual(['Pressemelding', 'Nr', '.', '064/98', 'Dato', ':', '13', 'oktober', '1998'],
                         result[0]['tokens'])

        self.assertEqual('http://odin.dep.no/fid/prm/1998/k4/981016.html', result[1]['url'])
        self.assertEqual('OD', result[1]['source'])
        self.assertEqual(98, result[1]['pub_year'])
        self.assertEqual(10, result[1]['pub_month'])
        self.assertEqual(16, result[1]['pub_day'])
        self.assertEqual(1, result[1]['corpus_section'])
        self.assertEqual(['Pressemelding', 'Nr', '.', '55/98', 'Dato', '16', '.', 'oktober', '1998'],
                         result[1]['tokens'])
    def test_section_1_parser(self):
        result = list(section_1_parser(StringIO(SECTION_1_SAMPLE_1)))
        self.assertEqual(2, len(result))
        self.assertEqual("http://odin.dep.no/fd/prm/1998/k4/981013.html", result[0]["url"])
        self.assertEqual("OD", result[0]["source"])
        self.assertEqual(98, result[0]["pub_year"])
        self.assertEqual(10, result[0]["pub_month"])
        self.assertEqual(13, result[0]["pub_day"])
        self.assertEqual(1, result[0]["corpus_section"])
        self.assertEqual(
            ["Pressemelding", "Nr", ".", "064/98", "Dato", ":", "13", "oktober", "1998"], result[0]["tokens"]
        )

        self.assertEqual("http://odin.dep.no/fid/prm/1998/k4/981016.html", result[1]["url"])
        self.assertEqual("OD", result[1]["source"])
        self.assertEqual(98, result[1]["pub_year"])
        self.assertEqual(10, result[1]["pub_month"])
        self.assertEqual(16, result[1]["pub_day"])
        self.assertEqual(1, result[1]["corpus_section"])
        self.assertEqual(
            ["Pressemelding", "Nr", ".", "55/98", "Dato", "16", ".", "oktober", "1998"], result[1]["tokens"]
        )