Пример #1
0
def actions_to_events(state):
    for bill in db.bills.find({'state': state}):
        print "Converting %s actions to events" % bill['_id']

        count = 1
        for action in bill['actions']:
            guid = "%s:action:%06d" % (bill['_id'], count)
            count += 1

            event = db.events.find_one({'state': state,
                                        '_guid': guid})

            description = "%s: %s" % (bill['bill_id'], action['action'])
            data = Event(bill['session'], action['date'],
                              'bill:action', description)
            data.add_participant('actor', action['actor'])
            data['_guid'] = guid
            data['state'] = state

            if not event:
                data['created_at'] = datetime.datetime.utcnow()
                data['updated_at'] = data['created_at']
                _insert_with_id(data)
            else:
                update(event, data, db.events)
Пример #2
0
    def scrape_house_weekly_schedule(self, session):
        url = "http://house.louisiana.gov/H_Sched/Hse_Sched_Weekly.htm"

        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)
            page.make_links_absolute(url)

            for link in page.xpath("//img[@alt = 'See Agenda in pdf']/.."):
                guid = link.attrib['href']

                committee = link.xpath("string(../../../td[1])").strip()

                when_and_where = link.xpath("string(../../../td[2])").strip()

                location = when_and_where.split(',')[-1]
                when = parse_datetime(when_and_where, session)



                description = 'Committee Meeting: %s' % committee

                event = Event(session, when, 'committee:meeting',
                              description, location=location)
                event.add_participant('committee', committee)
                event['link'] = guid

                self.save_event(event)
Пример #3
0
 def scrape_committee_agendas(self, chamber, session):
     """
     Scrape upper or lower committee agendas
     """
     # could use &ShowAll=ON doesn't seem to work though
     url = 'http://www.azleg.gov/CommitteeAgendas.asp?Body=%s' % \
                                       self._chamber_short[chamber]
     with self.urlopen(url) as agendas:
         root = html.fromstring(agendas)
         if chamber == 'upper':
             event_table = root.xpath('//table[@id="body"]/tr/td/table[2]/tr'
                                      '/td/table/tr/td/table')[0]
         else:
             event_table = root.xpath('//table[@id="body"]/tr/td/table[2]/tr'
                                      '/td/table/tr/td/table/tr/td/table')[0]
         for row in event_table.xpath('tr')[2:]:
             # Agenda Date, Committee, Revised, Addendum, Cancelled, Time, Room,
             # HTML Document, PDF Document for house
             # Agenda Date, Committee, Revised, Cancelled, Time, Room,
             # HTML Document, PDF Document for senate
             text = [ x.text_content().strip() for x in row.xpath('td') ]
             when, committee = text[0:2]
             if chamber == 'upper':
                 time, room = text[4:6]
                 link = row[6].xpath('string(a/@href)')
             else:
                 time, room = text[5:7]
                 link = row[7].xpath('string(a/@href)')
             if 'NOT MEETING' in time or 'CANCELLED' in time:
                 continue
             time = re.match('(\d+:\d+ (A|P))', time)
             if time:
                 when = "%s %sM" % (text[0], time.group(0))
                 when = datetime.datetime.strptime(when, '%m/%d/%Y %I:%M %p')
             else:
                 when = text[0]
                 when = datetime.datetime.strptime(when, '%m/%d/%Y')
                 
             when = self._tz.localize(when)
             
             title = "Committee Meeting:\n%s %s %s\n" % (
                                               self._chamber_long[chamber], 
                                               committee, room)
             (description, member_list, 
              meeting_type, other) = self.parse_agenda(chamber, link)
             event = Event(session, when, 'committee:meeting', title,
                           location=room, link=link, details=description)
             event.add_participant('committee', committee)
             event['participants'].extend(member_list)
             event.add_source(url)
             event.add_source(link)
             self.save_event(event)
Пример #4
0
    def scrape_committee_upcoming(self, session, chamber):
        chamber_name = {'upper': 'senate',
                        'lower': 'house'}[chamber]
        url = ("http://www.capitol.state.tx.us/MyTLO/RSS/RSS.aspx?"
               "Type=upcomingmeetings%s" % chamber_name)

        with self.urlopen(url) as page:
            feed = feedparser.parse(page)

            for entry in feed['entries']:
                title, date = entry['title'].split(' - ')

                time = re.match('Time: (\d+:\d+ (A|P)M)',
                                entry['description']).group(1)

                when = "%s %s" % (date, time)
                when = datetime.datetime.strptime(when, '%m/%d/%Y %I:%M %p')
                when = self._tz.localize(when)

                location = entry['description'].split('Location: ')[1]

                description = 'Committee Meeting\n'
                description += entry['title'] + '\n'
                description += entry['description']

                event = Event(session, when, 'committee:meeting',
                              description,
                              location=location)
                event.add_participant('committee', title)

                event['_guid'] = entry['guid']
                event['link'] = entry['link']

                event.add_source(url)

                self.save_event(event)