def test_basic_agenda(): e = Event(name="get-together", when=dt.datetime.utcnow(), location="Joe's Place") e.add_source(url='foobar') e.validate() agenda = e.add_agenda_item("foo bar") assert agenda e.validate()
def get_events(self): if self.session != self.get_current_session(): raise Exception("Can't do that, dude") start = dt.datetime.utcnow() start = start - dt.timedelta(days=10) end = start + dt.timedelta(days=30) url = URL.format(**{"from": start.strftime("%Y/%m/%d"), "til": end.strftime("%Y/%m/%d")}) page = self.lxmlize(url) events = page.xpath("//ul[contains(@class, 'committee-events')]//li") for event in events: string = event.text_content() po = CLICK_INFO.match(event.xpath(".//span")[0].attrib['onclick']) if po is None: continue poid = po.groupdict()['info_id'] # This is used to get more deetz on popage = self.popOverUrl(poid) when = dt.datetime.strptime(popage.xpath("//strong")[0].text, "%B %d, %Y @ %I:%M %p") who = popage.xpath("//h1")[0].text related = [] for item in popage.xpath("//div"): t = item.text if t is None: continue t = t.strip() for related_entity in ORD_INFO.findall(t): related.append({ "ord_no": related_entity, "what": t }) e = Event(name=who, session=self.session, when=when, location='unknown') e.add_source(url) for o in related: i = e.add_agenda_item(o['what']) i.add_bill(o['ord_no'], note='consideration') yield e
def get_events(self): if self.session != self.get_current_session(): raise Exception("Can't do that, dude") start = dt.datetime.utcnow() start = start - dt.timedelta(days=10) end = start + dt.timedelta(days=30) url = URL.format(**{ "from": start.strftime("%Y/%m/%d"), "til": end.strftime("%Y/%m/%d") }) page = self.lxmlize(url) events = page.xpath("//ul[contains(@class, 'committee-events')]//li") for event in events: string = event.text_content() po = CLICK_INFO.match(event.xpath(".//span")[0].attrib['onclick']) if po is None: continue poid = po.groupdict()[ 'info_id'] # This is used to get more deetz on popage = self.popOverUrl(poid) when = dt.datetime.strptime( popage.xpath("//strong")[0].text, "%B %d, %Y @ %I:%M %p") who = popage.xpath("//h1")[0].text related = [] for item in popage.xpath("//div"): t = item.text if t is None: continue t = t.strip() for related_entity in ORD_INFO.findall(t): related.append({"ord_no": related_entity, "what": t}) e = Event(name=who, session=self.session, when=when, location='unknown') e.add_source(url) for o in related: i = e.add_agenda_item(o['what']) i.add_bill(o['ord_no'], note='consideration') yield e
def get_events(self): "http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMeetingScheduleReport" "http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMemberAttendanceReport" # scrape attendance tmpdir = tempfile.mkdtemp() page = lxmlize("http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMemberAttendanceReport") members = page.xpath('//td[@class="inputText"]/select[@name="memberId"]/option') for member in members: post = { 'function': 'getMemberAttendanceReport', 'download': 'csv', 'exportPublishReportId': 1, 'termId': 4, 'memberId': member.attrib['value'], 'decisionBodyId': 0, } r = requests.post("http://app.toronto.ca/tmmis/getAdminReport.do", data=post) if r.headers['content-type'] != 'application/vnd.ms-excel': continue attendance_file = open(tmpdir + '/' + member.text + '.csv', 'w') attendance_file.write(r.text) attendance_file.close() # scrape events post = { 'function': 'getMeetingScheduleReport', 'download': 'csv', 'exportPublishReportId': 3, 'termId': 4, 'decisionBodyId': 0, } r = requests.post("http://app.toronto.ca/tmmis/getAdminReport.do", data=post) empty = [] meeting_file = open('meetings.csv', 'w') meeting_file.write(r.text) meeting_file.close() with open('meetings.csv', 'rb') as csvfile: csvfile = csv.reader(csvfile, delimiter=',') next(csvfile) committee = '' agenda_items = [] for row in csvfile: name = row[0] when = row[2] when = dt.datetime.strptime(when, "%Y-%m-%d") location = row[5] if name != committee: committee = name agenda_items = find_items(committee) e = Event(name=name, session=self.session, when=when, location=location ) attendees = find_attendees(tmpdir, row) if len(attendees) == 0: empty.append(row) for attendee in find_attendees(tmpdir, row): e.add_person(attendee) e.add_source("http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMeetingScheduleReport") for item in agenda_items: if item['date'].date() == when.date(): i = e.add_agenda_item(item['description']) i.add_committee(committee) i['order'] = item['order'] for link in item['links']: i.add_media_link(link['name'], link['url'], on_duplicate='ignore') if 'notes' in item: i['notes'] = [item['notes']] yield e shutil.rmtree(tmpdir) os.remove('meetings.csv')
def migrate_events(self, state): spec = {} if state: spec['state'] = state for entry in self.billy_db.events.find(spec, timeout=False): e = Event( name=entry['description'], when=entry['when'], location=entry['location'], session=entry['session'], updated_at=entry['updated_at'], created_at=entry['created_at'], type=entry['type'], ) e.identifiers = [{'scheme': 'openstates', 'identifier': entry['_id']}] e._openstates_id = entry['_id'] if entry.get('+location_url'): e.add_location_url(entry['+location_url']) link = entry.get('link', entry.get("+link")) if link: e.add_link(link, 'link') blacklist = ["description", "when", "location", "session", "updated_at", "created_at", "end", "sources", "documents", "related_bills", "state", "+link", "link", "level", "participants", "country", "_all_ids", "type"] e.status = entry.get('status') typos = { "canceled": "cancelled" } if e.status in typos: e.status = typos[e.status] for key, value in entry.items(): if key in blacklist or not value or key.startswith("_"): continue e.extras[key] = value if entry.get('end'): end = entry['end'] try: end = dt.datetime.fromtimestamp(end) except TypeError: pass e.end = end for source in entry['sources']: e.add_source(url=source['url']) if e.sources == []: continue # XXX: print warning for document in entry.get('documents', []): e.add_document(name=document.get('name'), document_id=document.get('doc_id'), url=document['url'], mimetype=document.get( "mimetype", document.get( "+mimetype", "application/octet-stream"))) # Try to add the mimetype. If it fails, fall back to a generic # undeclared application/octet-stream. agenda = None for bill in entry.get('related_bills', []): if agenda is None: agenda = e.add_agenda_item( description="Bills up for Consideration" ) hcid = _hot_cache.get(bill.get('id', None), None) bid = bill['bill_id'] if bid is None: continue agenda.add_bill(bill=bid, id=hcid) for who in entry.get('participants', []): participant_type = who.get('participant_type', 'committee') # I've gone through the backlog of OpenStates data, they are # all committees of some sort. who_chamber = who.get('chamber') if who_chamber is None: for chamber in ["_chamber", "+chamber"]: f = who.get(chamber) if f: who_chamber = f break if who_chamber is None: # Freak of nature ... continue hcid = _hot_cache.get(who.get('id', None), None) e.add_participant( name=who['participant'], type={ "committee": "organization", "legislator": "person", "person": "person", }[participant_type], id=hcid, note=who['type'], chamber=who_chamber) self.save_object(e)
def get_events(self): "http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMeetingScheduleReport" "http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMemberAttendanceReport" # scrape attendance tmpdir = tempfile.mkdtemp() page = lxmlize( "http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMemberAttendanceReport" ) members = page.xpath( '//td[@class="inputText"]/select[@name="memberId"]/option') for member in members: post = { 'function': 'getMemberAttendanceReport', 'download': 'csv', 'exportPublishReportId': 1, 'termId': 4, 'memberId': member.attrib['value'], 'decisionBodyId': 0, } r = requests.post("http://app.toronto.ca/tmmis/getAdminReport.do", data=post) if r.headers['content-type'] != 'application/vnd.ms-excel': continue attendance_file = open(tmpdir + '/' + member.text + '.csv', 'w') attendance_file.write(r.text) attendance_file.close() # scrape events post = { 'function': 'getMeetingScheduleReport', 'download': 'csv', 'exportPublishReportId': 3, 'termId': 4, 'decisionBodyId': 0, } r = requests.post("http://app.toronto.ca/tmmis/getAdminReport.do", data=post) empty = [] meeting_file = open('meetings.csv', 'w') meeting_file.write(r.text) meeting_file.close() with open('meetings.csv', 'rb') as csvfile: csvfile = csv.reader(csvfile, delimiter=',') next(csvfile) committee = '' agenda_items = [] for row in csvfile: name = row[0] when = row[2] when = dt.datetime.strptime(when, "%Y-%m-%d") location = row[5] if name != committee: committee = name agenda_items = find_items(committee) e = Event(name=name, session=self.session, when=when, location=location) attendees = find_attendees(tmpdir, row) if len(attendees) == 0: empty.append(row) for attendee in find_attendees(tmpdir, row): e.add_person(attendee) e.add_source( "http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMeetingScheduleReport" ) for item in agenda_items: if item['date'].date() == when.date(): i = e.add_agenda_item(item['description']) i.add_committee(committee) i['order'] = item['order'] for link in item['links']: i.add_media_link(link['name'], link['url'], on_duplicate='ignore') if 'notes' in item: i['notes'] = [item['notes']] yield e shutil.rmtree(tmpdir) os.remove('meetings.csv')