示例#1
0
 def parse_cosponsors_from_bill(self, bill, url):
     with self.urlopen(url) as bill_page:
         bill_page = lxml.html.fromstring(bill_page)
         sponsors_text = find_nodes_with_matching_text(
             bill_page, '//p/span', r'\s*INTRODUCED.*')
         if len(sponsors_text) == 0:
             # probably its withdrawn
             return
         sponsors_text = sponsors_text[0].text_content()
         sponsors = clean_text(sponsors_text).split(',')
         if len(
                 sponsors
         ) > 1:  # if there are several comma separated entries, list them.
             # the sponsor and the cosponsor were already got from the previous page, so ignore those:
             sponsors = sponsors[2::]
             for part in sponsors:
                 parts = re.split(r' (?i)and ', part)
                 for sponsor in parts:
                     cosponsor_name = clean_text(sponsor)
                     if cosponsor_name != "":
                         cosponsor_name = cosponsor_name.replace(
                             u'\u00a0', " ")  # epic hax
                         for name in re.split(r'\s+AND\s+', cosponsor_name):
                             # for name in cosponsor_name.split("AND"):
                             name = name.strip()
                             if name:
                                 bill.add_sponsor('cosponsor', name)
示例#2
0
 def parse_cosponsors_from_bill(self, bill, url):
     bill_page = self.urlopen(url)
     bill_page = lxml.html.fromstring(bill_page)
     sponsors_text = find_nodes_with_matching_text(
         bill_page, '//p/span', r'\s*INTRODUCED.*')
     if len(sponsors_text) == 0:
         # probably its withdrawn
         return
     sponsors_text = sponsors_text[0].text_content()
     sponsors = clean_text(sponsors_text).split(',')
     # if there are several comma separated entries, list them.
     if len(sponsors) > 1:
         # the sponsor and the cosponsor were already got from the previous
         # page, so ignore those:
         sponsors = sponsors[2::]
         for part in sponsors:
             parts = re.split(r' (?i)and ', part)
             for sponsor in parts:
                 cosponsor_name = clean_text(sponsor)
                 if cosponsor_name != "":
                     cosponsor_name = cosponsor_name.replace(
                         u'\u00a0', " ")  # epic hax
                     for name in re.split(r'\s+AND\s+', cosponsor_name):
                     # for name in cosponsor_name.split("AND"):
                         name = name.strip()
                         if name:
                             bill.add_sponsor('cosponsor', name)
示例#3
0
 def parse_cosponsors_from_bill(self, bill, url):
     with self.urlopen(url) as bill_page:
         bill_page = lxml.html.fromstring(bill_page)
         sponsors_text = find_nodes_with_matching_text(bill_page,'//p/span',r'\s*INTRODUCED.*')
         if len(sponsors_text) == 0:
             # probably its withdrawn
             return
         sponsors_text = sponsors_text[0].text_content()
         sponsors = clean_text(sponsors_text).split(',')
         if len(sponsors) > 1: # if there are several comma separated entries, list them.
             # the sponsor and the cosponsor were already got from the previous page, so ignore those:
             sponsors = sponsors[2::]
             for part in sponsors:
                 parts = re.split(r' (?i)and ',part)
                 for sponsor in parts:
                     bill.add_sponsor('cosponsor', clean_text(sponsor))