Пример #1
0
def test_basic_agenda():
    e = Event(name="get-together",
              when=dt.datetime.utcnow(),
              location="Joe's Place")

    e.add_source(url='foobar')
    e.validate()

    agenda = e.add_agenda_item("foo bar")
    assert agenda
    e.validate()
Пример #2
0
    def get_events(self):
        if self.session != self.get_current_session():
            raise Exception("Can't do that, dude")

        start = dt.datetime.utcnow()
        start = start - dt.timedelta(days=10)
        end = start + dt.timedelta(days=30)

        url = URL.format(**{"from": start.strftime("%Y/%m/%d"),
                            "til": end.strftime("%Y/%m/%d")})


        page = self.lxmlize(url)
        events = page.xpath("//ul[contains(@class, 'committee-events')]//li")

        for event in events:
            string = event.text_content()

            po = CLICK_INFO.match(event.xpath(".//span")[0].attrib['onclick'])
            if po is None:
                continue

            poid = po.groupdict()['info_id']  # This is used to get more deetz on

            popage = self.popOverUrl(poid)
            when = dt.datetime.strptime(popage.xpath("//strong")[0].text,
                                        "%B %d, %Y @ %I:%M %p")
            who = popage.xpath("//h1")[0].text
            related = []

            for item in popage.xpath("//div"):
                t = item.text
                if t is None:
                    continue

                t = t.strip()
                for related_entity in ORD_INFO.findall(t):
                    related.append({
                        "ord_no": related_entity,
                        "what": t
                    })

            e = Event(name=who,
                      session=self.session,
                      when=when,
                      location='unknown')
            e.add_source(url)

            for o in related:
                i = e.add_agenda_item(o['what'])
                i.add_bill(o['ord_no'], note='consideration')

            yield e
Пример #3
0
    def get_events(self):
        if self.session != self.get_current_session():
            raise Exception("Can't do that, dude")

        start = dt.datetime.utcnow()
        start = start - dt.timedelta(days=10)
        end = start + dt.timedelta(days=30)

        url = URL.format(**{
            "from": start.strftime("%Y/%m/%d"),
            "til": end.strftime("%Y/%m/%d")
        })

        page = self.lxmlize(url)
        events = page.xpath("//ul[contains(@class, 'committee-events')]//li")

        for event in events:
            string = event.text_content()

            po = CLICK_INFO.match(event.xpath(".//span")[0].attrib['onclick'])
            if po is None:
                continue

            poid = po.groupdict()[
                'info_id']  # This is used to get more deetz on

            popage = self.popOverUrl(poid)
            when = dt.datetime.strptime(
                popage.xpath("//strong")[0].text, "%B %d, %Y @ %I:%M %p")
            who = popage.xpath("//h1")[0].text
            related = []

            for item in popage.xpath("//div"):
                t = item.text
                if t is None:
                    continue

                t = t.strip()
                for related_entity in ORD_INFO.findall(t):
                    related.append({"ord_no": related_entity, "what": t})

            e = Event(name=who,
                      session=self.session,
                      when=when,
                      location='unknown')
            e.add_source(url)

            for o in related:
                i = e.add_agenda_item(o['what'])
                i.add_bill(o['ord_no'], note='consideration')

            yield e
Пример #4
0
  def get_events(self):
    "http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMeetingScheduleReport"
    "http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMemberAttendanceReport"

 # scrape attendance

    tmpdir = tempfile.mkdtemp()

    page = lxmlize("http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMemberAttendanceReport")
    members = page.xpath('//td[@class="inputText"]/select[@name="memberId"]/option')
    for member in members:
      post = {
        'function': 'getMemberAttendanceReport',
        'download': 'csv',
        'exportPublishReportId': 1,
        'termId': 4,
        'memberId': member.attrib['value'],
        'decisionBodyId': 0,
      }
      r = requests.post("http://app.toronto.ca/tmmis/getAdminReport.do", data=post)
      if r.headers['content-type'] != 'application/vnd.ms-excel':
        continue

      attendance_file = open(tmpdir + '/' + member.text + '.csv', 'w')
      attendance_file.write(r.text)
      attendance_file.close()

# scrape events
    post = {
      'function': 'getMeetingScheduleReport',
      'download': 'csv',
      'exportPublishReportId': 3,
      'termId': 4,
      'decisionBodyId': 0,
    }

    r = requests.post("http://app.toronto.ca/tmmis/getAdminReport.do", data=post)
    empty = []

    meeting_file = open('meetings.csv', 'w')
    meeting_file.write(r.text)
    meeting_file.close()
    with open('meetings.csv', 'rb') as csvfile:
      csvfile = csv.reader(csvfile, delimiter=',')
      next(csvfile)

      committee = ''
      agenda_items = []

      for row in csvfile:
        name = row[0]
        when = row[2]
        when = dt.datetime.strptime(when, "%Y-%m-%d")
        location = row[5]

        if name != committee:
          committee = name
          agenda_items = find_items(committee)

        e = Event(name=name,
                  session=self.session,
                  when=when,
                  location=location
                  )

        attendees = find_attendees(tmpdir, row)
        if len(attendees) == 0:
          empty.append(row)
        for attendee in find_attendees(tmpdir, row):
          e.add_person(attendee)
        e.add_source("http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMeetingScheduleReport")

        for item in agenda_items:
          if item['date'].date() == when.date():
            i = e.add_agenda_item(item['description'])
            i.add_committee(committee)
            i['order'] = item['order']

            for link in item['links']:
              i.add_media_link(link['name'], link['url'], on_duplicate='ignore')

            if 'notes' in item:
              i['notes'] = [item['notes']]

        yield e

    shutil.rmtree(tmpdir)
    os.remove('meetings.csv')
Пример #5
0
    def migrate_events(self, state):
        spec = {}
        if state:
            spec['state'] = state

        for entry in self.billy_db.events.find(spec, timeout=False):

            e = Event(
                name=entry['description'],
                when=entry['when'],
                location=entry['location'],
                session=entry['session'],
                updated_at=entry['updated_at'],
                created_at=entry['created_at'],
                type=entry['type'],
            )
            e.identifiers = [{'scheme': 'openstates',
                             'identifier': entry['_id']}]
            e._openstates_id = entry['_id']
            if entry.get('+location_url'):
                e.add_location_url(entry['+location_url'])

            link = entry.get('link', entry.get("+link"))
            if link:
                e.add_link(link, 'link')

            blacklist = ["description", "when", "location", "session",
                         "updated_at", "created_at", "end", "sources",
                         "documents", "related_bills", "state", "+link",
                         "link", "level", "participants", "country",
                         "_all_ids", "type"]

            e.status = entry.get('status')
            typos = {
                "canceled": "cancelled"
            }
            if e.status in typos:
                e.status = typos[e.status]

            for key, value in entry.items():
                if key in blacklist or not value or key.startswith("_"):
                    continue
                e.extras[key] = value

            if entry.get('end'):
                end = entry['end']
                try:
                    end = dt.datetime.fromtimestamp(end)
                except TypeError:
                    pass

                e.end = end

            for source in entry['sources']:
                e.add_source(url=source['url'])

            if e.sources == []:
                continue  # XXX: print warning

            for document in entry.get('documents', []):
                e.add_document(name=document.get('name'),
                               document_id=document.get('doc_id'),
                               url=document['url'],
                               mimetype=document.get(
                                   "mimetype", document.get(
                                       "+mimetype",
                                       "application/octet-stream")))
                # Try to add the mimetype. If it fails, fall back to a generic
                # undeclared application/octet-stream.

            agenda = None
            for bill in entry.get('related_bills', []):
                if agenda is None:
                    agenda = e.add_agenda_item(
                        description="Bills up for Consideration"
                    )

                hcid = _hot_cache.get(bill.get('id', None), None)
                bid = bill['bill_id']
                if bid is None:
                    continue

                agenda.add_bill(bill=bid, id=hcid)

            for who in entry.get('participants', []):
                participant_type = who.get('participant_type', 'committee')
                # I've gone through the backlog of OpenStates data, they are
                # all committees of some sort.

                who_chamber = who.get('chamber')
                if who_chamber is None:
                    for chamber in ["_chamber", "+chamber"]:
                        f = who.get(chamber)
                        if f:
                            who_chamber = f
                            break

                if who_chamber is None:
                    # Freak of nature ...
                    continue

                hcid = _hot_cache.get(who.get('id', None), None)

                e.add_participant(
                    name=who['participant'],
                    type={
                        "committee": "organization",
                        "legislator": "person",
                        "person": "person",
                    }[participant_type],
                    id=hcid,
                    note=who['type'],
                    chamber=who_chamber)

            self.save_object(e)
Пример #6
0
    def get_events(self):
        "http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMeetingScheduleReport"
        "http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMemberAttendanceReport"

        # scrape attendance

        tmpdir = tempfile.mkdtemp()

        page = lxmlize(
            "http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMemberAttendanceReport"
        )
        members = page.xpath(
            '//td[@class="inputText"]/select[@name="memberId"]/option')
        for member in members:
            post = {
                'function': 'getMemberAttendanceReport',
                'download': 'csv',
                'exportPublishReportId': 1,
                'termId': 4,
                'memberId': member.attrib['value'],
                'decisionBodyId': 0,
            }
            r = requests.post("http://app.toronto.ca/tmmis/getAdminReport.do",
                              data=post)
            if r.headers['content-type'] != 'application/vnd.ms-excel':
                continue

            attendance_file = open(tmpdir + '/' + member.text + '.csv', 'w')
            attendance_file.write(r.text)
            attendance_file.close()


# scrape events
        post = {
            'function': 'getMeetingScheduleReport',
            'download': 'csv',
            'exportPublishReportId': 3,
            'termId': 4,
            'decisionBodyId': 0,
        }

        r = requests.post("http://app.toronto.ca/tmmis/getAdminReport.do",
                          data=post)
        empty = []

        meeting_file = open('meetings.csv', 'w')
        meeting_file.write(r.text)
        meeting_file.close()
        with open('meetings.csv', 'rb') as csvfile:
            csvfile = csv.reader(csvfile, delimiter=',')
            next(csvfile)

            committee = ''
            agenda_items = []

            for row in csvfile:
                name = row[0]
                when = row[2]
                when = dt.datetime.strptime(when, "%Y-%m-%d")
                location = row[5]

                if name != committee:
                    committee = name
                    agenda_items = find_items(committee)

                e = Event(name=name,
                          session=self.session,
                          when=when,
                          location=location)

                attendees = find_attendees(tmpdir, row)
                if len(attendees) == 0:
                    empty.append(row)
                for attendee in find_attendees(tmpdir, row):
                    e.add_person(attendee)
                e.add_source(
                    "http://app.toronto.ca/tmmis/getAdminReport.do?function=prepareMeetingScheduleReport"
                )

                for item in agenda_items:
                    if item['date'].date() == when.date():
                        i = e.add_agenda_item(item['description'])
                        i.add_committee(committee)
                        i['order'] = item['order']

                        for link in item['links']:
                            i.add_media_link(link['name'],
                                             link['url'],
                                             on_duplicate='ignore')

                        if 'notes' in item:
                            i['notes'] = [item['notes']]

                yield e

        shutil.rmtree(tmpdir)
        os.remove('meetings.csv')