Python Vote.items примеры использования

Язык программирования: Python

Пространство имен/Пакет: billy.scrape.votes

Класс/Тип: Vote

Метод/Функция: items

Примеров на hotexamples.com: 4

Python Vote.items - 4 примера найдено. Это лучшие примеры Python кода для billy.scrape.votes.Vote.items, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Vote(30)

add_source(30)

no(30)

other(30)

yes(30)

validate(12)

update(4)

strip(3)

items(2)

Пример #1

Показать файл

    def scrape_vote(self,
                    url,
                    date,
                    chamber,
                    passed,
                    motion,
                    re_digit=re.compile(r'\d{1,3}'),
                    re_totals=re.compile(
                        r'(?:Yes|No|Not Voting|Absent):\s{,3}(\d{,3})', re.I)):

        namespaces = {"re": "http://exslt.org/regular-expressions"}
        try:
            doc = lxml.html.fromstring(self.urlopen(url))
        except scrapelib.HTTPError as e:
            known_fail_links = [
                "http://legis.delaware.gov/LIS/lis146.nsf/7712cf7cc0e9227a852568470077336f/cdfd8149e79c2bb385257a24006e9f7a?OpenDocument"
            ]
            if "404" in str(e.response):
                # XXX: Ugh, ok, so there's no way (that I could find quickly)
                #      to get the _actual_ response (just "ok") from the object.
                #      As a result, this. Forgive me.
                #            -PRT
                if url in known_fail_links:
                    return
            raise

        xpath = ("//font[re:match(., '^(Yes|No|Not Voting|Absent):', 'i')]"
                 "/ancestor::tr[1]")

        # Get the vote tallies.
        try:
            totals = doc.xpath(xpath, namespaces=namespaces)
            totals = totals[0].text_content()

        except IndexError:
            # Here the vote page didn't have have the typical format.
            # Maybe it's a hand edited page. Log and try to parse
            # the vitals from plain text.
            self.log('Found an unusual votes page at url: "%s"' % url)
            totals = re_totals.findall(doc.text_content())
            if len(totals) == 4:
                self.log('...was able to parse vote tallies from "%s"' % url)

        else:
            totals = re_digit.findall(totals)

        try:
            yes_count, no_count, abstentions, absent = map(int, totals)

        except ValueError:
            # There were'nt any votes listed on this page. This is probably
            # a "voice vote" lacking actual vote tallies.
            yes_count, no_count, other_count = 0, 0, 0

        else:
            other_count = abstentions + absent

        # Create the vote object.
        vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                    other_count)

        # Add source.
        vote.add_source(url)

        # Get the "vote type"
        el = doc.xpath('//font[contains(., "Vote Type:")]')[0]
        try:
            vote_type = el.xpath('following-sibling::font[1]/text()')[0]
        except IndexError:
            vote_type = el.xpath('../following-sibling::font[1]/text()')[0]

        vote['vote_type'] = vote_type

        # Get an iterator like: name1, vote1, name2, vote2, ...
        xpath = ("//font[re:match(., '^[A-Z]$')]"
                 "/../../../descendant::td/font/text()")
        data = doc.xpath(xpath, namespaces=namespaces)
        data = filter(lambda s: s.strip(), data)

        # Handle the rare case where not all names have corresponding
        # text indicating vote value. See e.g. session 146 HB10.
        data_len = len(data) / 2
        tally = sum(v for (k, v) in vote.items() if '_count' in k)

        if (0 < data_len) and ((data_len) != tally):
            xpath = ("//font[re:match(., '^[A-Z]$')]/ancestor::table")
            els = doc.xpath(xpath, namespaces=namespaces)[-1]
            els = els.xpath('descendant::td')
            data = [e.text_content().strip() for e in els]

        data = iter(data)

        # Add names and vote values.
        vote_map = {
            'Y': 'yes',
            'N': 'no',
        }

        while True:

            try:
                name = data.next()
                _vote = data.next()

                # Evidently, the motion for vote can be rescinded before
                # the vote is cast, perhaps due to a quorum failure.
                # (See the Senate vote (1/26/2011) for HB 10 w/HA 1.) In
                # this rare case, values in the vote col are whitespace. Skip.
                if not _vote.strip():
                    continue

                _vote = vote_map.get(_vote, 'other')
                getattr(vote, _vote)(name)

            except StopIteration:
                break

        return vote

Пример #2

Показать файл

Файл: bills.py Проект: h4ck3rm1k3/openstates

    def scrape_vote(self, url,
                    re_digit=re.compile(r'\d{1,3}'),
                    re_totals=re.compile(
                        r'(?:Yes|No|Not Voting|Absent):\s{,3}(\d{,3})', re.I)):
        namespaces = {"re": "http://exslt.org/regular-expressions"}
        try:
            html = self.urlopen(url)
            doc = lxml.html.fromstring(html)
        except scrapelib.HTTPError as e:
            known_fail_links = [
                "http://legis.delaware.gov/LIS/lis146.nsf/7712cf7cc0e9227a852568470077336f/cdfd8149e79c2bb385257a24006e9f7a?OpenDocument"
            ]
            if "404" in str(e.response):
                # XXX: Ugh, ok, so there's no way (that I could find quickly)
                #      to get the _actual_ response (just "ok") from the object.
                #      As a result, this. Forgive me.
                #            -PRT
                if url in known_fail_links:
                    msg = 'Recieved a bogus 22/404 return code. Skipping vote.'
                    self.warning(msg)
                    return
            raise

        if 'Committee Report' in lxml.html.tostring(doc):
            # This was a committee vote with weird formatting.
            self.info('Skipping committee report.')
            return

        xpath = ("//font[re:match(., '^(Yes|No|Not Voting|Absent):', 'i')]"
                 "/ancestor::tr[1]")

        # Get the vote tallies.
        try:
            totals = doc.xpath(xpath, namespaces=namespaces)
            totals = totals[0].text_content()

        except IndexError:
            # Here the vote page didn't have have the typical format.
            # Maybe it's a hand edited page. Log and try to parse
            # the vitals from plain text.
            self.warning('Found an unusual votes page at url: "%s"' % url)
            totals = re_totals.findall(doc.text_content())
            if len(totals) == 4:
                self.warning('...was able to parse vote tallies from "%s"' %
                             url)

        else:
            totals = re_digit.findall(totals)

        try:
            yes_count, no_count, abstentions, absent = map(int, totals)

        except ValueError:
            # There were'nt any votes listed on this page. This is probably
            # a "voice vote" lacking actual vote tallies.
            yes_count, no_count, other_count = 0, 0, 0

        else:
            other_count = abstentions + absent

        font_text = [s.strip() for s in doc.xpath('//font/text()')]
        date_index = font_text.index('Date:')
        date_string = font_text[date_index + 2]
        date = datetime.strptime(date_string, '%m/%d/%Y %H:%M %p')
        passed = True if font_text[date_index + 4] else False
        counts = defaultdict(int)
        for key, string in [
            ('yes_count', 'Yes:'),
            ('no_count', 'No:'),
            ('absent_count', 'Absent:'),
                ('not_voting', 'Not Voting:')]:
            try:
                counts[key] = int(font_text[font_text.index(string) + 2])
            except ValueError:
                continue
        counts['other_count'] = counts['absent_count'] + counts['not_voting']

        chamber_string = doc.xpath('string(//b/u/font/text())').lower()
        if 'senate' in chamber_string:
            chamber = 'upper'
        elif 'house' in chamber_string:
            chamber = 'lower'

        for xpath in (
            'string(//td/b/text())',
            'string(//td/b/font/text())',
                'string(//form/b/font/text())'):
            motion = doc.xpath(xpath)
            if motion:
                break
            # Will fail at validictory level if no motion found.

        # Create the vote object.
        vote = Vote(chamber, date, motion, passed,
                    counts['yes_count'], counts['no_count'],
                    counts['other_count'])

        # Add source.
        vote.add_source(url)

        # Get the "vote type"
        el = doc.xpath('//font[contains(., "Vote Type:")]')[0]
        try:
            vote_type = el.xpath('following-sibling::font[1]/text()')[0]
        except IndexError:
            vote_type = el.xpath('../following-sibling::font[1]/text()')[0]

        vote['vote_type'] = vote_type

        # Get an iterator like: name1, vote1, name2, vote2, ...
        xpath = ("//font[re:match(., '^[A-Z]$')]"
                 "/../../../descendant::td/font/text()")
        data = doc.xpath(xpath, namespaces=namespaces)
        data = filter(lambda s: s.strip(), data)

        # Handle the rare case where not all names have corresponding
        # text indicating vote value. See e.g. session 146 HB10.
        data_len = len(data) / 2
        tally = sum(v for (k, v) in vote.items() if '_count' in k)

        if (0 < data_len) and ((data_len) != tally):
            xpath = ("//font[re:match(., '^[A-Z]$')]/ancestor::table")
            els = doc.xpath(xpath, namespaces=namespaces)[-1]
            els = els.xpath('descendant::td')
            data = [e.text_content().strip() for e in els]

        data = iter(data)

        # Add names and vote values.
        vote_map = {
            'Y': 'yes',
            'N': 'no',
        }

        while True:

            try:
                name = data.next()
                _vote = data.next()

                # Evidently, the motion for vote can be rescinded before
                # the vote is cast, perhaps due to a quorum failure.
                # (See the Senate vote (1/26/2011) for HB 10 w/HA 1.) In
                # this rare case, values in the vote col are whitespace. Skip.
                if not _vote.strip():
                    continue

                _vote = vote_map.get(_vote, 'other')
                getattr(vote, _vote)(name)

            except StopIteration:
                break

        return vote

Пример #3

Показать файл

Файл: bills.py Проект: VersaHQ/openstates

    def scrape_vote(self, url, date, chamber, passed, motion,
                    re_digit=re.compile(r'\d{1,3}'),
                    re_totals=re.compile(
                        r'(?:Yes|No|Not Voting|Absent):\s{,3}(\d{,3})', re.I)):

        namespaces = {"re": "http://exslt.org/regular-expressions"}
        try:
            doc = lxml.html.fromstring(self.urlopen(url))
        except scrapelib.HTTPError as e:
            known_fail_links = [
                "http://legis.delaware.gov/LIS/lis146.nsf/7712cf7cc0e9227a852568470077336f/cdfd8149e79c2bb385257a24006e9f7a?OpenDocument"
            ]
            if "404" in str(e.response):
                # XXX: Ugh, ok, so there's no way (that I could find quickly)
                #      to get the _actual_ response (just "ok") from the object.
                #      As a result, this. Forgive me.
                #            -PRT
                if url in known_fail_links:
                    return
            raise

        xpath = ("//font[re:match(., '^(Yes|No|Not Voting|Absent):', 'i')]"
                 "/ancestor::tr[1]")

        # Get the vote tallies.
        try:
            totals = doc.xpath(xpath, namespaces=namespaces)
            totals = totals[0].text_content()

        except IndexError:
            # Here the vote page didn't have have the typical format.
            # Maybe it's a hand edited page. Log and try to parse
            # the vitals from plain text.
            self.log('Found an unusual votes page at url: "%s"' % url)
            totals = re_totals.findall(doc.text_content())
            if len(totals) == 4:
                self.log('...was able to parse vote tallies from "%s"' % url)

        else:
            totals = re_digit.findall(totals)


        try:
            yes_count, no_count, abstentions, absent = map(int, totals)

        except ValueError:
            # There were'nt any votes listed on this page. This is probably
            # a "voice vote" lacking actual vote tallies.
            yes_count, no_count, other_count = 0, 0, 0

        else:
            other_count = abstentions + absent

        # Create the vote object.
        vote = Vote(chamber, date, motion, passed,
                    yes_count, no_count, other_count)

        # Add source.
        vote.add_source(url)

        # Get the "vote type"
        el = doc.xpath('//font[contains(., "Vote Type:")]')[0]
        try:
            vote_type = el.xpath('following-sibling::font[1]/text()')[0]
        except IndexError:
            vote_type = el.xpath('../following-sibling::font[1]/text()')[0]

        vote['vote_type'] = vote_type

        # Get an iterator like: name1, vote1, name2, vote2, ...
        xpath = ("//font[re:match(., '^[A-Z]$')]"
                 "/../../../descendant::td/font/text()")
        data = doc.xpath(xpath, namespaces=namespaces)
        data = filter(lambda s: s.strip(), data)

        # Handle the rare case where not all names have corresponding
        # text indicating vote value. See e.g. session 146 HB10.
        data_len = len(data)/2
        tally = sum(v for (k, v) in vote.items() if '_count' in k)

        if (0 < data_len) and ((data_len) != tally):
            xpath = ("//font[re:match(., '^[A-Z]$')]/ancestor::table")
            els = doc.xpath(xpath, namespaces=namespaces)[-1]
            els = els.xpath('descendant::td')
            data = [e.text_content().strip() for e in els]

        data = iter(data)

        # Add names and vote values.
        vote_map = {
            'Y': 'yes',
            'N': 'no',
            }

        while True:

            try:
                name = data.next()
                _vote = data.next()

                # Evidently, the motion for vote can be rescinded before
                # the vote is cast, perhaps due to a quorum failure.
                # (See the Senate vote (1/26/2011) for HB 10 w/HA 1.) In
                # this rare case, values in the vote col are whitespace. Skip.
                if not _vote.strip():
                    continue

                _vote = vote_map.get(_vote, 'other')
                getattr(vote, _vote)(name)

            except StopIteration:
                break

        return vote

Пример #4

Показать файл

    def scrape_vote(self,
                    url,
                    re_digit=re.compile(r'\d{1,3}'),
                    re_totals=re.compile(
                        r'(?:Yes|No|Not Voting|Absent):\s{,3}(\d{,3})', re.I)):
        namespaces = {"re": "http://exslt.org/regular-expressions"}
        try:
            html = self.urlopen(url)
            doc = lxml.html.fromstring(html)
        except scrapelib.HTTPError as e:
            known_fail_links = [
                "http://legis.delaware.gov/LIS/lis146.nsf/7712cf7cc0e9227a852568470077336f/cdfd8149e79c2bb385257a24006e9f7a?OpenDocument",
                'http://legis.delaware.gov/LIS/lis147.nsf/7712cf7cc0e9227a852568470077336f/5f86852ea6649fa285257d08001bbe06?OpenDocument'
            ]
            if "404" in str(e.response):
                # XXX: Ugh, ok, so there's no way (that I could find quickly)
                #      to get the _actual_ response (just "ok") from the object.
                #      As a result, this. Forgive me.
                #            -PRT
                # XXX: THERE SHALL BE NO FORGIVENESS FOR PAULTAG!!!!
                #
                #       Just kidding. I blame Delaware.
                #            -TWN
                if url in known_fail_links:
                    msg = 'Recieved a bogus 22/404 return code. Skipping vote.'
                    self.warning(msg)
                    return
            raise

        if 'Committee Report' in lxml.html.tostring(doc):
            # This was a committee vote with weird formatting.
            self.info('Skipping committee report.')
            return

        xpath = ("//font[re:match(., '^(Yes|No|Not Voting|Absent):', 'i')]"
                 "/ancestor::tr[1]")

        # Get the vote tallies.
        try:
            totals = doc.xpath(xpath, namespaces=namespaces)
            totals = totals[0].text_content()

        except IndexError:
            # Here the vote page didn't have have the typical format.
            # Maybe it's a hand edited page. Log and try to parse
            # the vitals from plain text.
            self.warning('Found an unusual votes page at url: "%s"' % url)
            totals = re_totals.findall(doc.text_content())
            if len(totals) == 4:
                self.warning('...was able to parse vote tallies from "%s"' %
                             url)

        else:
            totals = re_digit.findall(totals)

        try:
            yes_count, no_count, abstentions, absent = map(int, totals)

        except ValueError:
            # There were'nt any votes listed on this page. This is probably
            # a "voice vote" lacking actual vote tallies.
            yes_count, no_count, other_count = 0, 0, 0

        else:
            other_count = abstentions + absent

        font_text = [s.strip() for s in doc.xpath('//font/text()')]
        date_index = font_text.index('Date:')
        date_string = font_text[date_index + 2]
        date = datetime.strptime(date_string, '%m/%d/%Y %H:%M %p')
        passed = True if font_text[date_index + 4] else False
        counts = defaultdict(int)
        for key, string in [('yes_count', 'Yes:'), ('no_count', 'No:'),
                            ('absent_count', 'Absent:'),
                            ('not_voting', 'Not Voting:')]:
            try:
                counts[key] = int(font_text[font_text.index(string) + 2])
            except ValueError:
                continue
        counts['other_count'] = counts['absent_count'] + counts['not_voting']

        chamber_string = doc.xpath('string(//b/u/font/text())').lower()
        if 'senate' in chamber_string:
            chamber = 'upper'
        elif 'house' in chamber_string:
            chamber = 'lower'

        for xpath in ('string(//td/b/text())', 'string(//td/b/font/text())',
                      'string(//form/b/font/text())'):
            motion = doc.xpath(xpath)
            if motion:
                break
            # Will fail at validictory level if no motion found.

        # Create the vote object.
        vote = Vote(chamber, date, motion, passed, counts['yes_count'],
                    counts['no_count'], counts['other_count'])

        # Add source.
        vote.add_source(url)

        # Get the "vote type"
        el = doc.xpath('//font[contains(., "Vote Type:")]')[0]
        try:
            vote_type = el.xpath('following-sibling::font[1]/text()')[0]
        except IndexError:
            vote_type = el.xpath('../following-sibling::font[1]/text()')[0]

        vote['vote_type'] = vote_type

        # Get an iterator like: name1, vote1, name2, vote2, ...
        xpath = ("//font[re:match(., '^[A-Z]$')]"
                 "/../../../descendant::td/font/text()")
        data = doc.xpath(xpath, namespaces=namespaces)
        data = filter(lambda s: s.strip(), data)

        # Handle the rare case where not all names have corresponding
        # text indicating vote value. See e.g. session 146 HB10.
        data_len = len(data) / 2
        tally = sum(v for (k, v) in vote.items() if '_count' in k)

        if (0 < data_len) and ((data_len) != tally):
            xpath = ("//font[re:match(., '^[A-Z]$')]/ancestor::table")
            els = doc.xpath(xpath, namespaces=namespaces)[-1]
            els = els.xpath('descendant::td')
            data = [e.text_content().strip() for e in els]

        data = iter(data)

        # Add names and vote values.
        vote_map = {
            'Y': 'yes',
            'N': 'no',
        }

        while True:

            try:
                name = data.next()
                _vote = data.next()

                # Evidently, the motion for vote can be rescinded before
                # the vote is cast, perhaps due to a quorum failure.
                # (See the Senate vote (1/26/2011) for HB 10 w/HA 1.) In
                # this rare case, values in the vote col are whitespace. Skip.
                if not _vote.strip():
                    continue

                _vote = vote_map.get(_vote, 'other')
                getattr(vote, _vote)(name)

            except StopIteration:
                break

        return vote