示例#1
0
    def test_agencies_with_bad_footnotes(self):
        body = '<table class="data"><tbody><tr><th>West Covina<sup>2</sup></tr></tbody></table><ul class="tablenotes">\
<li><sup>1</sup> The figures shown in this column for the offense of rape were reported using the revised Uniform Crime Reporting (UCR) definition of rape. See Data Declaration for further explanation.</li>\
</ul>'

        soup = BeautifulSoup(body, 'html.parser')
        footnotes_hash = util.footnotes_hash(soup)
        agencies = util.agencies_with_footnotes(soup, footnotes_hash)
        assert len(agencies) == 1
        assert agencies[0] == ['West Covina', '[BAD FOOTNOTE]']
示例#2
0
    def test_agencies_with_no_footnotes(self):
        body = '<table class="data"><tbody><tr><th>West Covina</tr></tbody></table><ul class="tablenotes">\
<li><sup>1</sup> The figures shown in this column for the offense of rape were reported using the revised Uniform Crime Reporting (UCR) definition of rape. See Data Declaration for further explanation.</li>\
<li><sup>2</sup> The figures shown in this column for the offense of rape were reported using the legacy UCR definition of rape. See Data Declaration for further explanation.</li>\
<li><sup>3</sup> Because of changes in the state/local agency' 's reporting practices, figures are not comparable to previous years' ' data.</li>\
</ul>'

        soup = BeautifulSoup(body, 'html.parser')
        footnotes_hash = util.footnotes_hash(soup)
        agencies = util.agencies_with_footnotes(soup, footnotes_hash)
        assert len(agencies) == 0
示例#3
0
    def test_footnotes_hash_create(self):
        body = '<ul class="tablenotes">\
<li><sup>1</sup> The figures shown in this column for the offense of rape were reported using the revised Uniform Crime Reporting (UCR) definition of rape. See Data Declaration for further explanation.</li>\
<li><sup>2</sup> The figures shown in this column for the offense of rape were reported using the legacy UCR definition of rape. See Data Declaration for further explanation.</li>\
<li><sup>3</sup> Because of changes in the state/local agency' 's reporting practices, figures are not comparable to previous years' ' data.</li>\
</ul>'

        soup = BeautifulSoup(body, 'html.parser')
        footnotes = util.footnotes_hash(soup)

        assert len(footnotes) == 3
        assert sorted(footnotes.keys()) == [1, 2, 3]
        assert footnotes[
            2] == 'The figures shown in this column for the offense of rape were reported using the legacy UCR definition of rape. See Data Declaration for further explanation.'
示例#4
0
 def run(self):
     with self.input().open('r') as in_file:
         soup = BeautifulSoup(in_file, 'html.parser')
         # first read the footnotes
         footnotes = util.footnotes_hash(soup)
         agencies = util.agencies_with_footnotes(soup, footnotes)
         with self.output().open('wb') as csvfile:
             csvwriter = csv.writer(csvfile)
             for agency_name, footnote in agencies:
                 csvwriter.writerow([
                     self.year, self.year,
                     util.state_abbr(self.state), None,
                     util.agency_type(self.table), agency_name, footnote
                 ])
示例#5
0
 def test_footnotes_hash_empty(self):
     soup = BeautifulSoup('', 'html.parser')
     footnotes = util.footnotes_hash(soup)
     assert len(footnotes) == 0