Пример #1
0
    def scrape(self):
        committee_d = {}

        for councilman, committees in self.councilMembers() :

            p = Person(' '.join((councilman['First name'], councilman['Last name']))) 
            if p.name == 'Toni Preckwinkle' :
                continue
            elif p.name == 'Robert Steele' :
                district = 2
            elif p.name == 'Jerry Butler' :
                district = 3
            elif p.name == 'Sean Morrison' :
                district = 17
            else :
                district = re.findall('\d+', councilman['Person Name']['url'])[0]

            start_date = self.toTime(councilman['Start Date']).date()
            end_date = self.toTime(councilman['End Date']).date()

            if end_date == datetime.date(2018, 12, 2) :
                end_date = ''
            else :
                end_date = end_date.isoformat()

            p.add_term('Commissioner', 'legislature', 
                       district='District {}'.format(district), 
                       start_date=start_date.isoformat(),
                       end_date=end_date)

            if councilman["E-mail"]:
                p.add_contact_detail(type="email",
                                     value=councilman['E-mail']['url'],
                                     note='E-mail')

            if councilman['Web site']:
                p.add_link(councilman['Web site']['url'], note='web site')


            p.add_source(councilman['Person Name']['url'])

            for committee, _, _ in committees:
                committee_name = committee['Department Name']['label']

                if 'committee' in committee_name.lower() :
                    o = committee_d.get(committee_name, 
                                        None)
                    if o is None:
                        o = Organization(committee_name,
                                         classification='committee',
                                         parent_id={'name' : 'Cook County Board of Commissioners'})
                        o.add_source(committee['Department Name']['url'])
                        committee_d[committee_name] = o

                    membership = o.add_member(p, role=committee["Title"])
                    membership.start_date = self.mdY2Ymd(committee["Start Date"])
            yield p

        for o in committee_d.values() :
            yield o
Пример #2
0
def test_person_add_term():
    p = Person('Eternal')
    p.add_term('eternal', 'council', start_date='0001', end_date='9999')
    p._related[0].validate()
    assert get_pseudo_id(p._related[0].organization_id) == {
        'classification': 'council',
    }
    assert p._related[0].start_date == '0001'
    assert p._related[0].end_date == '9999'
Пример #3
0
    def scrape(self):
        current_path = Path(__file__)
        legislator_path = current_path.parent / 'congress-legislators/legislators-historical.yaml'

        with legislator_path.open() as f:
            legislators = yaml.load(f, Loader=yaml.CLoader)

        for legislator in legislators:
            if all(term['end'] < '1970' for term in legislator['terms']):
                continue

            l = Person(name=' '.join(
                (legislator['name']['first'], legislator['name']['last'])),
                       birth_date=legislator['bio'].get('birthday', ''),
                       gender=legislator['bio']['gender'])

            parties = set()
            for term in legislator['terms']:
                state = term['state']
                parties.add(term['party'])

                if term['type'] == 'rep':
                    role = 'Representative'
                    district_name = self._district_name(
                        state, term['district'])
                    chamber = 'lower'
                else:
                    role = "Senator"
                    district_name = "{state}, Class {klass}".format(
                        state=state, klass=term['class'])
                    chamber = 'upper'

                l.add_term(role,
                           chamber,
                           district=district_name,
                           start_date=term['start'],
                           end_date=term['end'])

            for party in parties:
                l.add_party(party)

            for scheme, identifier in legislator['id'].items():
                l.add_identifier(str(identifier), scheme=scheme)

            l.add_source(
                'https://github.com/unitedstates/congress-legislators/blob/master/legislators-historical.yaml'
            )

            yield l
Пример #4
0
    def get_organizations(self):
        #REQUIRED: define an organization using this format
        #where org_name is something like Seattle City Council
        #and classification is described here:
        org = Organization(name="Mountain View City Council",
                           classification="legislature")

        # REQUIRED: yield the organization
        yield org

        # OPTIONAL: add posts to your organizaion using this format,
        # where label is a human-readable description of the post (eg "Ward 8 councilmember")
        # and role is the position type (eg councilmember, alderman, mayor...)
        # skip entirely if you're not writing a people scraper.
        city = Organization('City of Mountain View',
                            classification='executive')
        city.add_post(
            'Mayor',
            'Mayor',
            division_id='ocd-division/country:us/state:ca/place:mountainview')
        city.add_post(
            'City Manager',
            'City Manager',
            division_id='ocd-division/country:us/state:ca/place:mountainview')
        city.add_post(
            'City Clerk',
            'City Clerk',
            division_id='ocd-division/country:us/state:ca/place:mountainview')

        yield city

        abekoga = Person(name="Abe-Koga, Margaret")
        abekoga.add_term('Mayor',
                         'executive',
                         start_date=datetime.date(1989, 4, 24),
                         end_date=datetime.date(2011, 5, 16),
                         appointment=True)
        abekoga.add_source('https://mountainview.legistar.com/People.aspx')
        yield abekoga
Пример #5
0
    def scrape(self):

        council = requests.get(
            'https://www.stpaul.gov/departments/city-council')
        base = html.fromstring(council.text)
        base.make_links_absolute(
            'https://www.stpaul.gov/departments/city-council')
        links = base.xpath('.//*[@class="field-item even"]/p/a/@href')
        links = list(set(links))
        links = [l for l in links if 'ward' in l]
        names = []
        for link in links:
            person = {}
            root = requests.get(link)
            base = html.fromstring(root.text)
            block = base.xpath(
                './/*[@class="well well--blue well--big-padding block-content"]'
            )[0]
            ps = block.xpath('.//p')
            ps = [p for p in ps if len(p.xpath('.//*')) > 0]
            name = block.xpath('.//p/a/text()')[0].split(' ')
            title = base.xpath('.//*[@id="page-title"]/text()')[0]
            if len(name) == 3:
                name.pop(1)
            name = (' ').join(name)
            if not name in names:
                names.append(name)
                person['name'] = name
                person['ward'] = title.split('-')[0].strip()
                person['role'] = title.split('-')[1].split(' ')[0].strip()
                member = Person(name=person['name'], role=person['role'])
                member.add_source(link)
                member.add_term(person['role'],
                                'legislature',
                                org_name='Saint Paul City Council',
                                district=person['ward'])
                yield member
Пример #6
0
    def get_organizations(self):
        #REQUIRED: define an organization using this format
        #where org_name is something like Seattle City Council
        #and classification is described here:
        org = Organization(name="Common Council", classification="legislature")
        for x in range(1, 16):
            org.add_post(
                "District {}".format(x),
                "Alderman",
                division_id=
                'ocd-division/country:us/state:wi/place:milwaukee/council_district:{}'
                .format(x))

        # OPTIONAL: add posts to your organizaion using this format,
        # where label is a human-readable description of the post (eg "Ward 8 councilmember")
        # and role is the position type (eg councilmember, alderman, mayor...)
        # skip entirely if you're not writing a people scraper.
        #org.add_post(label="position_description", role="position_type")

        #REQUIRED: yield the organization
        yield org

        city = Organization("City of Milwaukee", classification='executive')
        city.add_post(
            'Mayor',
            'Mayor',
            division_id='ocd-division/country:us/state:wi/place:milwaukee')

        yield city

        barrett = Person(name="Barrett, Tom")
        barrett.add_term('Mayor',
                         'executive',
                         start_date=datetime.date(2004, 4, 15),
                         appointment=True)
        barrett.add_source('https://milwaukee.legistar.com/People.aspx')
        yield barrett
Пример #7
0
    def scrape(self):
        body_types = self.body_types()

        board_of_directors, = [
            body for body in self.bodies()
            if body['BodyName'] == 'Board of Directors'
        ]

        members = {}
        for office in self.body_offices(board_of_directors):
            members.setdefault(office['OfficeRecordFullName'],
                               []).append(office)

        for member, offices in members.items():
            p = Person(member)
            for term in offices:
                role = term['OfficeRecordTitle']

                if role != 'non-voting member':
                    role = 'Board Member'
                    post = VOTING_POSTS.get(member)
                else:
                    role = 'Nonvoting Board Member'
                    post = NONVOTING_POSTS.get(member)

                p.add_term(role,
                           'legislature',
                           district=post,
                           start_date=self.toDate(
                               office['OfficeRecordStartDate']),
                           end_date=self.toDate(office['OfficeRecordEndDate']))

            legistar_api = self.BASE_URL + '/OfficeRecords/'

            p.add_source(legistar_api, note='api')
            print(p)

            yield p

        adjunct_members = {}

        for body in self.bodies():
            if body['BodyTypeId'] == body_types['Committee']:
                o = Organization(body['BodyName'],
                                 classification='committee',
                                 parent_id={'name': 'Board of Directors'})

                o.add_source(self.BASE_URL + '/Bodies/')

                for office in self.body_offices(body):
                    role = office['OfficeRecordTitle']
                    if role not in ("Chair", "Vice Chair"):
                        role = 'Member'

                    person = office['OfficeRecordFullName']
                    if person not in members:
                        if person not in adjunct_members:
                            p = Person(person)
                            p.add_source('foo')

                        else:
                            p = adjunct_members[person]

                        p.add_membership(body['BodyName'],
                                         role=role,
                                         start_date=self.toDate(
                                             office['OfficeRecordStartDate']),
                                         end_date=self.toDate(
                                             office['OfficeRecordEndDate']))
                        adjunct_members[person] = p
                    else:
                        o.add_member(office['OfficeRecordFullName'],
                                     role,
                                     start_date=self.toDate(
                                         office['OfficeRecordStartDate']),
                                     end_date=self.toDate(
                                         office['OfficeRecordEndDate']))

                yield o

        for p in adjunct_members.values():
            yield p
Пример #8
0
    def scrape(self):
        web_scraper = LegistarPersonScraper(
            requests_per_minute=self.requests_per_minute)
        web_scraper.MEMBERLIST = 'http://legistar.council.nyc.gov/DepartmentDetail.aspx?ID=6897&GUID=CDC6E691-8A8C-4F25-97CB-86F31EDAB081&Mode=MainBody'

        if self.cache_storage:
            web_scraper.cache_storage = self.cache_storage

        if self.requests_per_minute == 0:
            web_scraper.cache_write_only = False

        web_info = {}

        for member, _ in web_scraper.councilMembers():
            name = member['Person Name']['label'].strip()
            web_info[name] = member

        city_council, = [
            body for body in self.bodies()
            if body['BodyName'] == 'City Council'
        ]

        terms = collections.defaultdict(list)

        public_advocates = {  # Match casing to Bill De Blasio as council member
            'The Public Advocate (Mr. de Blasio)': 'Bill De Blasio',
            'The Public Advocate (Ms. James)': 'Letitia James',
        }

        for office in self.body_offices(city_council):
            name = office['OfficeRecordFullName']
            name = public_advocates.get(name, name).strip()

            terms[name].append(office)

            # Add past members (and advocates public)
            if name not in web_info:
                web_info[name] = collections.defaultdict(lambda: None)

        # Check that we have everyone we expect, formatted consistently, in
        # both information arrays. For instance, this will fail if we forget to
        # strip trailing spaces from names on one side or the other (which has
        # the effect of omitting information, such as post, from the scrape).

        assert set(web_info.keys()) == set(terms.keys())

        members = {}

        for member, offices in terms.items():

            p = Person(member)

            web = web_info[member]

            for term in offices:
                role = term['OfficeRecordTitle']

                if role == 'Public Advocate':
                    role = 'Non-Voting Council Member'
                else:
                    role = 'Council Member'

                district = web.get('District', '').replace(' 0', ' ')

                p.add_term(role,
                           'legislature',
                           district=district,
                           start_date=self.toDate(
                               term['OfficeRecordStartDate']),
                           end_date=self.toDate(term['OfficeRecordEndDate']))

                party = web.get('Political Party')

                if party == 'Democrat':
                    party = 'Democratic'

                if party:
                    p.add_party(party)

                if web.get('Photo'):
                    p.image = web['Photo']

                contact_types = {
                    "City Hall Office": ("address", "City Hall Office"),
                    "City Hall Phone": ("voice", "City Hall Phone"),
                    "Ward Office Phone": ("voice", "Ward Office Phone"),
                    "Ward Office Address": ("address", "Ward Office Address"),
                    "Fax": ("fax", "Fax")
                }

                for contact_type, (type_, _note) in contact_types.items():
                    if web.get(contact_type) and web(contact_type) != 'N/A':
                        p.add_contact_detail(type=type_,
                                             value=web[contact_type],
                                             note=_note)

                if web.get('E-mail'):
                    p.add_contact_detail(type="email",
                                         value=web['E-mail']['url'],
                                         note='E-mail')

                if web.get('Web site'):
                    p.add_link(web['Web site']['url'], note='web site')

                if web.get('Notes'):
                    p.extras = {'Notes': web['Notes']}

                if not p.sources:  # Only add sources once
                    source_urls = self.person_sources_from_office(term)
                    person_api_url, person_web_url = source_urls
                    p.add_source(person_api_url, note='api')
                    p.add_source(person_web_url, note='web')

            members[member] = p

        committee_types = [
            'Committee', 'Inactive Committee', 'Select Committee',
            'Subcommittee', 'Task Force', 'Land Use'
        ]  # Committee on Land Use

        body_types = {
            k: v
            for k, v in self.body_types().items() if k in committee_types
        }

        for body in self.bodies():
            if body['BodyTypeName'] in body_types \
                or body['BodyName'] in ('Legislative Documents Unit',
                                        'Legal and Government Affairs Division'):

                # Skip typo in API data
                if body['BodyName'] == 'Committee on Mental Health, Developmental Disability, Alcoholism, Substance Abuse amd Disability Services':
                    continue

                parent_org = PARENT_ORGS.get(body['BodyName'],
                                             'New York City Council')

                body_name = body['BodyName']

                o = Organization(body_name,
                                 classification='committee',
                                 parent_id={'name': parent_org})

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body),
                             note='api')
                o.add_source(
                    self.WEB_URL +
                    '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.
                    format(**body),
                    note='web')

                for office in self.body_offices(body):
                    # Possible roles: 'Council Member', 'MEMBER', 'Ex-Officio',
                    # 'Committee Member', None, 'CHAIRPERSON'

                    role = office['OfficeRecordTitle']

                    if role and role.lower() == 'chairperson':
                        role = 'Chairperson'
                    else:
                        role = 'Member'

                    person = office['OfficeRecordFullName']
                    person = public_advocates.get(person, person).strip()

                    if person in members:
                        p = members[person]
                    else:
                        p = Person(person)

                        source_urls = self.person_sources_from_office(office)
                        person_api_url, person_web_url = source_urls
                        p.add_source(person_api_url, note='api')
                        p.add_source(person_web_url, note='web')

                        members[person] = p

                    p.add_membership(o,
                                     role=role,
                                     start_date=self.toDate(
                                         office['OfficeRecordStartDate']),
                                     end_date=self.toDate(
                                         office['OfficeRecordEndDate']))

                yield o

        for p in members.values():
            yield p
Пример #9
0
    def get_organizations(self):
        global date_range

        city = Organization('City of Saint Paul', classification='executive')
        city.add_post(
            'Mayor',
            'Mayor',
            division_id='ocd-division/country:us/state:mn/place:st_paul')
        city.add_post(
            'City Clerk',
            'City Clerk',
            division_id='ocd-division/country:us/state:mn/place:st_paul')
        yield city

        council = Organization(name="Saint Paul City Council",
                               classification="legislature",
                               parent_id=city)
        for x in range(1, 8):
            council.add_post(
                "Ward {}".format(x),
                "Councilmember",
                division_id=
                'ocd-division/country:us/state:mn/place:st_paul/ward:{}'.
                format(x))

        yield council

        carter = Person(name="Melvin Carter")
        carter.add_term('Mayor',
                        'executive',
                        start_date=dtdate(2018, 1, 19),
                        appointment=True)

        carter.add_source('http://www.google.com')
        yield carter

        new_meetings = []
        temp_labels = []
        for date in date_range:
            print('Checking date:', date)
            root = requests.get("https://www.stpaul.gov/calendar/" + date)
            base = html.fromstring(root.text)
            items = base.xpath('.//*/div[@class="view-content"]/div')
            meetings = []
            for i in items:
                if len(
                        i.xpath(
                            './/*/span[@class="date-display-single"]/text()')
                ) > 0:
                    d = {}
                    d['date'] = i.xpath(
                        './/*/span[@class="date-display-single"]/text()')[0]
                    d['info'] = i.xpath(
                        './/*/span[@class="field-content"]/a/text()')[0]
                    d['link'] = i.xpath(
                        './/*/span[@class="field-content"]/a/@href')[0]
                    meetings.append(d)

            for m in meetings:
                m['link'] = "https://www.stpaul.gov" + m['link']
            for m in meetings:
                r = requests.get(m['link'])
                b = html.fromstring(r.text)
                exists = b.xpath('.//div[@class="node-content clearfix"]')
                if len(exists) > 0:
                    if not 'City Council' in m[
                            'info'] and not 'Legislative' in m[
                                'info'] and not 'Holiday' in m['info']:
                        m['name'] = m['info'].replace('Meeting', '').replace(
                            ' - Cancelled', '').replace('Events', '').strip()
                        if not m['name'] in temp_labels:
                            temp_labels.append(m['name'])
                            new_meetings.append(m)

        print('Creating organizations')
        for m in new_meetings:
            print(m)
            cmt = Organization(name=m['name'],
                               classification='committee',
                               parent_id=city)
            cmt.add_source(m['link'])
            yield cmt
Пример #10
0
    def scrape(self):
        '''
        Scrape the web to create a dict with all active organizations.
        Then, we can access the correct URL for the organization detail page.
        '''
        web_scraper = LegistarPersonScraper(
            requests_per_minute=self.requests_per_minute)
        web_scraper.MEMBERLIST = 'https://metro.legistar.com/People.aspx'
        web_info = {}

        for _, organizations in web_scraper.councilMembers():
            for organization, _, _ in organizations:
                organization_name = organization['Department Name'][
                    'label'].strip()
                organization_info = organization['Department Name']

                web_info[organization_name] = organization_info

        body_types = self.body_types()

        board_of_directors, = [
            body for body in self.bodies()
            if body['BodyName'] == 'Board of Directors - Regular Board Meeting'
        ]
        board_of_directors["BodyName"] = "Board of Directors"

        terms = collections.defaultdict(list)
        for office in self.body_offices(board_of_directors):
            terms[office['OfficeRecordFullName']].append(office)

        members = {}
        for member, offices in terms.items():
            p = Person(member)
            for term in offices:
                role = term['OfficeRecordTitle']

                if role not in {'Board Member', 'non-voting member'}:
                    p.add_term(
                        role,
                        'legislature',
                        start_date=self.toDate(term['OfficeRecordStartDate']),
                        end_date=self.toDate(term['OfficeRecordEndDate']),
                        appointment=True)
                if role != 'Chief Executive Officer':
                    if role == 'non-voting member':
                        member_type = 'Nonvoting Board Member'
                        post = NONVOTING_POSTS.get(member)
                    else:
                        member_type = 'Board Member'
                        post = VOTING_POSTS.get(member)

                    start_date = self.toDate(term['OfficeRecordStartDate'])
                    end_date = self.toDate(term['OfficeRecordEndDate'])
                    board_membership = p.add_term(member_type,
                                                  'legislature',
                                                  district=post,
                                                  start_date=start_date,
                                                  end_date=end_date)

                    acting_member_end_date = ACTING_MEMBERS_WITH_END_DATE.get(
                        p.name)
                    if acting_member_end_date and acting_member_end_date <= end_date:
                        board_membership.extras = {'acting': 'true'}

            source_urls = self.person_sources_from_office(term)
            person_api_url, person_web_url = source_urls
            p.add_source(person_api_url, note='api')
            p.add_source(person_web_url, note='web')

            members[member] = p

        for body in self.bodies():
            if body['BodyTypeId'] == body_types['Committee']:
                organization_name = body['BodyName'].strip()
                o = Organization(organization_name,
                                 classification='committee',
                                 parent_id={'name': 'Board of Directors'})

                organization_info = web_info.get(organization_name, {})
                organization_url = organization_info.get(
                    'url', self.WEB_URL +
                    'https://metro.legistar.com/Departments.aspx')

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body),
                             note='api')
                o.add_source(organization_url, note='web')

                for office in self.body_offices(body):
                    role = office['OfficeRecordTitle']

                    if role not in ("Chair", "Vice Chair",
                                    "Chief Executive Officer"):
                        if role == 'non-voting member':
                            role = 'Nonvoting Member'
                        else:
                            role = 'Member'

                    person = office['OfficeRecordFullName']

                    if person in members:
                        p = members[person]
                    else:
                        p = Person(person)

                        source_urls = self.person_sources_from_office(office)
                        person_api_url, person_web_url = source_urls
                        p.add_source(person_api_url, note='api')
                        p.add_source(person_web_url, note='web')

                        members[person] = p

                    start_date = self.toDate(office['OfficeRecordStartDate'])
                    end_date = self.toDate(office['OfficeRecordEndDate'])
                    membership = p.add_membership(organization_name,
                                                  role=role,
                                                  start_date=start_date,
                                                  end_date=end_date)

                    acting_member_end_date = ACTING_MEMBERS_WITH_END_DATE.get(
                        p.name)
                    if acting_member_end_date and acting_member_end_date <= end_date:
                        membership.extras = {'acting': 'true'}

                yield o

        for p in members.values():
            yield p
Пример #11
0
    def scrape(self):
        noncommittees = {'Committee of the Whole'}
        committee_d = {}

        people_d = {}

        for councilman, committees in self.councilMembers() :

            
            if 'url' in councilman['Person Name'] :
                councilman_url = councilman['Person Name']['url']

                if councilman_url in people_d :
                    people_d[councilman_url][0].append(councilman) 
                else :
                    people_d[councilman_url] = [councilman], committees

        for person_entries, committees in people_d.values() :

            councilman = person_entries[-1]
            
            p = Person(councilman['Person Name']['label'])
            
            if p.name == 'Letitia James' :
                p.name = 'Letitia Ms. James'
                p.add_name('Letitia James')

            spans = [(self.toTime(entry['Start Date']).date(), 
                      self.toTime(entry['End Date']).date(),
                      entry['District'])
                     for entry in person_entries]

            merged_spans = []
            last_end_date = None
            last_district = None
            for start_date, end_date, district in sorted(spans) :
                if last_end_date is None :
                    span = [start_date, end_date, district]
                elif (start_date - last_end_date) == datetime.timedelta(1) and district == last_district :
                    span[1] = end_date
                else :
                    merged_spans.append(span)
                    span = [start_date, end_date, district]

                last_end_date = end_date
                last_district = district

            merged_spans.append(span)

            for start_date, end_date, district in merged_spans :
                district = councilman['District'].replace(' 0', ' ')
                if end_date == datetime.date(2017, 12, 31) :
                    end_date = ''
                else :
                    end_date = end_date.isoformat()
                print(start_date, end_date)
                p.add_term('Council Member', 'legislature', 
                           district=district, 
                           start_date=start_date.isoformat(),
                           end_date=end_date)

            party = councilman['Political Party']
            if party == 'Democrat' :
                party = 'Democratic'
            
            if party :
                p.add_party(party)

            if councilman['Photo'] :
                p.image = councilman['Photo']

            if councilman["E-mail"]:
                p.add_contact_detail(type="email",
                                     value=councilman['E-mail']['url'],
                                     note='E-mail')

            if councilman['Web site']:
                p.add_link(councilman['Web site']['url'], note='web site')

            p.extras = {'Notes' : councilman['Notes']}
                 
            p.add_source(councilman['Person Name']['url'], note='web')

            for committee, _, _ in committees:
                committee_name = committee['Department Name']['label']
                if committee_name not in noncommittees and 'committee' in committee_name.lower():
                    o = committee_d.get(committee_name, None)
                    if o is None:
                        parent_id = PARENT_ORGS.get(committee_name,
                                                    'New York City Council')
                        o = Organization(committee_name,
                                         classification='committee',
                                         parent_id={'name' : parent_id})
                        o.add_source(committee['Department Name']['url'])
                        committee_d[committee_name] = o

                    membership = o.add_member(p, role=committee["Title"])
                    membership.start_date = self.mdY2Ymd(committee["Start Date"])
            yield p
            

        for o in committee_d.values() :
            if 'Committee' in o.name :
                yield o

        for o in committee_d.values() :
            if 'Subcommittee' in o.name :
                yield o

        o = Organization('Committee on Mental Health, Developmental Disability, Alcoholism, Drug Abuse and Disability Services',
                         classification='committee',
                         parent_id={'name' : 'New York City Council'})
        o.add_source("http://legistar.council.nyc.gov/Departments.aspx")

        yield o

        o = Organization('Subcommittee on Drug Abuse',
                         classification='committee',
                         parent_id={'name' : 'Committee on Mental Health, Developmental Disability, Alcoholism, Drug Abuse and Disability Services'})
        o.add_source("http://legistar.council.nyc.gov/Departments.aspx")

        yield o
Пример #12
0
    def get_organizations(self):
        org = Organization(name="Chicago City Council", classification="legislature")
        for x in range(1, 51):
            org.add_post(
                "Ward {}".format(x),
                "Alderman",
                division_id='ocd-division/country:us/state:il/place:chicago/ward:{}'.format(x))

        yield org

        city = Organization('City of Chicago', classification='executive')
        city.add_post('Mayor', 'Mayor', division_id='ocd-division/country:us/state:il/place:chicago')
        city.add_post('City Clerk', 'City Clerk', division_id='ocd-division/country:us/state:il/place:chicago')

        yield city

        daley = Person(name="Daley, Richard M.")
        daley.add_term('Mayor',
                       'executive',
                       start_date=datetime.date(1989, 4, 24),
                       end_date=datetime.date(2011, 5, 16),
                       appointment=True)
        daley.add_source('https://chicago.legistar.com/People.aspx')
        yield daley

        emanuel = Person(name="Emanuel, Rahm")
        emanuel.add_term('Mayor',
                         'executive',
                         start_date=datetime.date(2011, 5, 16),
                         appointment=True)
        emanuel.add_source('https://chicago.legistar.com/People.aspx')
        yield emanuel

        mendoza = Person(name='Mendoza, Susana A.')
        mendoza.add_term('City Clerk',
                         'executive',
                         start_date=datetime.date(2011, 5, 16),
                         end_date=datetime.date(2016, 12, 4),
                         appointment=True)

        mendoza.add_source('https://chicago.legistar.com/People.aspx')
        yield mendoza

        valle = Person(name='Del Valle, Miguel')
        valle.add_term('City Clerk',
                       'executive',
                       start_date=datetime.date(2006, 12, 1),
                       end_date=datetime.date(2011, 5, 16),
                       appointment=True)

        valle.add_source('https://chicago.legistar.com/People.aspx')
        yield valle

        valencia = Person(name='Valencia, Anna M.')
        valencia.add_term(role='City Clerk',
                          org_classification='executive',
                          start_date=datetime.date(2017, 1, 25),
                          end_date=datetime.date(2019, 5, 20),
                          appointment=True)

        valencia.add_source('https://chicago.legistar.com/People.aspx')
        yield valencia
Пример #13
0
    def scrape(self):
        body_types = self.body_types()

        city_council, = [body for body in self.bodies()
                         if body['BodyName'] == 'City Council ']

        terms = collections.defaultdict(list)

        for office in self.body_offices(city_council):

            if office['OfficeRecordFullName'] != "Granicus BA":
                terms[office['OfficeRecordFullName']].append(office)

        members = {}

        for member, offices in terms.items():

            p = Person(member)
            for term in offices:
                role = term['OfficeRecordTitle']
                p.add_term(role,
                           'legislature',
                           # district = "District {}".format(int(web['District/Office'])),
                           start_date=self.toDate(term['OfficeRecordStartDate']),
                           end_date=self.toDate(term['OfficeRecordEndDate']))

            source_urls = self.person_sources_from_office(term)
            person_api_url, person_web_url = source_urls
            p.add_source(person_api_url, note='api')
            p.add_source(person_web_url, note='web')

            members[member] = p

        for body in self.bodies():
            if body['BodyTypeId'] == body_types['Standing Committees']:
                o = Organization(body['BodyName'],
                                 classification='committee',
                                 parent_id={'name': 'Sacramento City Council'})

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body), note='api')
                o.add_source(self.WEB_URL + '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.format(**body),
                             note='web')

                for office in self.body_offices(body):
                    # messed up record for joanna thompson
                    if office['OfficeRecordId'] == 1055:
                        continue

                    role = office['OfficeRecordTitle']
                    if role not in ("Vice Chair", "Chairperson"):
                        role = 'Member'

                    person = office['OfficeRecordFullName'].strip()
                    if person in members:
                        p = members[person]
                    else:
                        p = Person(person)

                        source_urls = self.person_sources_from_office(office)
                        person_api_url, person_web_url = source_urls
                        p.add_source(person_api_url, note='api')
                        p.add_source(person_web_url, note='web')

                        members[person] = p

                    p.add_membership(body['BodyName'],
                                     role=role,
                                     start_date=self.toDate(office['OfficeRecordStartDate']),
                                     end_date=self.toDate(office['OfficeRecordEndDate']))

                yield o

        for body in self.bodies():
            if body['BodyTypeId'] == body_types['Boards or Commission']:
                o = Organization(body['BodyName'],
                                 classification='commission',
                                 parent_id={'name': 'Sacramento City Council'})

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body), note='api')
                o.add_source(self.WEB_URL + '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.format(**body),
                             note='web')

                yield o

        for p in members.values():
            yield p
Пример #14
0
    def scrape(self):

        # School Board
        school = 'http://board.mpls.k12.mn.us/'
        body = requests.get(school)
        base = html.fromstring(body.text)
        base.make_links_absolute(school)
        members = base.xpath('.//*/div[@class="summary"]')
        members = list(set(members))
        board = []
        for member in members:
            b = {}
            b['term'] = member.xpath('.//span/p/text()')[-1].replace(
                '\r\n\t', '').replace('\xa0', '').replace('|', '').strip()
            b['district'] = member.xpath('.//*/a/text()')[-1]
            link = member.xpath('.//*/@href')[0]
            member_base = requests.get(link)
            member_base = html.fromstring(member_base.text)
            member_base.make_links_absolute(school)
            text = member_base.xpath('.//*/span/p/text()')
            text = [mb.strip() for mb in text]
            text = [t for t in text if len(t) > 0]
            #            print(text, '\n\n+++\n\n')
            b['role'] = text[0].split(',')[1]
            b['email'] = member_base.xpath('.//*/span/p/a/@href')[0]
            b['name'] = member_base.xpath('.//*/div/span/text()')[1]
            try:
                b['headshot'] = member_base.xpath('.//*/div/a/@href')[1]
            except:
                pass
            member = Person(name=b['name'], role=b['role'])
            member.add_source(url=school)
            member.add_term('Director',
                            'legislature',
                            org_name='Minneapolis School Board',
                            district=b['district'])
            yield member

        # City Council

        council = 'http://www.minneapolismn.gov/council/'
        body = requests.get(council)
        base = html.fromstring(body.text)
        base.make_links_absolute(council)
        wards = base.xpath('.//*/ul[@id="cname"]/li')
        for w in wards:
            i = {}
            link = w.xpath('.//a/@href')[0]
            text = w.xpath('.//a/text()')[0]
            i['link'] = link
            i['ward'] = text.split('-')[0].strip()
            i['name'] = text.split('-')[1].strip()
            member = Person(name=i['name'], role='Council Member')
            member.add_source(link)
            member.add_term('Councilmember',
                            'legislature',
                            org_name='Minneapolis City Council',
                            district=i['ward'])
            yield member

        # Park and Rec Board
        parks = 'https://www.minneapolisparks.org/about_us/leadership_and_structure/commissioners/'
        body = requests.get(parks)
        base = html.fromstring(body.text)
        base.make_links_absolute(parks)
        member_base = base.xpath('.//*/div[@class="col-12"]/div/div/a')
        members = []
        for mb in member_base:
            m = {}
            m['name'] = mb.xpath('.//h3/text()')[0]
            m['link'] = mb.xpath('.//@href')[0]
            m['headshot'] = mb.xpath('.//img/@src')[0]
            post_base = mb.xpath('.//p/span/text()')[0]
            m['post'] = post_base.replace('Commissioner', '').strip()
            if ',' in m['post']:
                m['role'] = m['post'].split(',')[1]
                m['post'] = m['post'].split(',')[0]
            else:
                m['role'] = 'Commisioner'
            member = Person(name=m['name'], role=m['role'])
            member.add_source(url=parks)
            member.add_term('Commissioner',
                            'legislature',
                            org_name='Minneapolis Parks and Recreation',
                            district=m['post'])
            yield member
Пример #15
0
    def scrape(self):
        '''
        Scrape the web to create a dict with all active organizations.
        Then, we can access the correct URL for the organization detail page.
        '''
        web_scraper = LegistarPersonScraper(
            requests_per_minute=self.requests_per_minute)
        web_scraper.MEMBERLIST = 'https://metro.legistar.com/People.aspx'
        web_info = {}

        for _, organizations in web_scraper.councilMembers():
            for organization, _, _ in organizations:
                organization_name = organization['Department Name'][
                    'label'].strip()
                organization_info = organization['Department Name']

                web_info[organization_name] = organization_info

        body_types = self.body_types()

        board_of_directors, = [
            body for body in self.bodies()
            if body['BodyName'] == 'Board of Directors - Regular Board Meeting'
        ]
        board_of_directors["BodyName"] = "Board of Directors"

        terms = collections.defaultdict(list)
        for office in self.body_offices(board_of_directors):
            terms[office['OfficeRecordFullName']].append(office)

        members = {}
        for member, offices in terms.items():
            p = Person(member)

            for term in offices:
                role = term['OfficeRecordTitle']

                if role not in {'Board Member', 'non-voting member'}:
                    p.add_term(
                        role,
                        'legislature',
                        start_date=self.toDate(term['OfficeRecordStartDate']),
                        end_date=self.toDate(term['OfficeRecordEndDate']),
                        appointment=True)

                if role != 'Chief Executive Officer':
                    if role == 'non-voting member':
                        member_type = 'Nonvoting Board Member'
                        post = NONVOTING_POSTS.get(member)
                    else:
                        member_type = 'Board Member'
                        post = VOTING_POSTS.get(member)

                    start_date = self.toDate(term['OfficeRecordStartDate'])
                    end_date = self.toDate(term['OfficeRecordEndDate'])
                    board_membership = p.add_term(member_type,
                                                  'legislature',
                                                  district=post,
                                                  start_date=start_date,
                                                  end_date=end_date)

                    acting_member_end_date = ACTING_MEMBERS_WITH_END_DATE.get(
                        p.name)

                    if acting_member_end_date and acting_member_end_date <= end_date:
                        board_membership.extras = {'acting': 'true'}

            # Each term contains first and last names. This should be the same
            # across all of a person's terms, so go ahead and grab them from the
            # last term in the array.
            p.family_name = term['OfficeRecordLastName']
            p.given_name = term['OfficeRecordFirstName']

            # Defensively assert that the given and family names match the
            # expected value.
            if member == 'Hilda L. Solis':
                # Given/family name does not contain middle initial.
                assert p.given_name == 'Hilda' and p.family_name == 'Solis'
            else:
                assert member == ' '.join([p.given_name, p.family_name])

            source_urls = self.person_sources_from_office(term)
            person_api_url, person_web_url = source_urls

            p.add_source(person_api_url, note='api')
            p.add_source(person_web_url, note='web')

            members[member] = p

        for body in self.bodies():
            if body['BodyTypeId'] in (
                    body_types['Committee'],
                    body_types['Independent Taxpayer Oversight Committee']):
                organization_name = body['BodyName'].strip()
                o = Organization(organization_name,
                                 classification='committee',
                                 parent_id={'name': 'Board of Directors'})

                organization_info = web_info.get(organization_name, {})
                organization_url = organization_info.get(
                    'url', self.WEB_URL +
                    'https://metro.legistar.com/Departments.aspx')

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body),
                             note='api')
                o.add_source(organization_url, note='web')

                for office in self.body_offices(body):
                    role = office['OfficeRecordTitle']

                    if role not in BOARD_OFFICE_ROLES:
                        if role == 'non-voting member':
                            role = 'Nonvoting Member'
                        else:
                            role = 'Member'

                    person = office['OfficeRecordFullName']

                    if person in members:
                        p = members[person]
                    else:
                        p = Person(person)

                        source_urls = self.person_sources_from_office(office)
                        person_api_url, person_web_url = source_urls
                        p.add_source(person_api_url, note='api')
                        p.add_source(person_web_url, note='web')

                        members[person] = p

                    start_date = self.toDate(office['OfficeRecordStartDate'])
                    end_date = self.toDate(office['OfficeRecordEndDate'])
                    membership = p.add_membership(organization_name,
                                                  role=role,
                                                  start_date=start_date,
                                                  end_date=end_date)

                    acting_member_end_date = ACTING_MEMBERS_WITH_END_DATE.get(
                        p.name)
                    if acting_member_end_date and acting_member_end_date <= end_date:
                        membership.extras = {'acting': 'true'}

                yield o

        for p in members.values():
            yield p
Пример #16
0
    def get_organizations(self):
        org = Organization(name="Pittsburgh City Council", classification="legislature")
        for x in range(1, 10):
            org.add_post(
                "District {}".format(x),
                "Councilmember",
                division_id="ocd-division/country:us/state:pa/place:pittsburgh/council_district:{}".format(x))
        yield org

        standing_committee = Organization(name="Standing Committee", classification="committee")
        standing_committee.add_source("http://pittsburghpa.gov/council/standing-committees", note="web")
        yield standing_committee

        mayor = Organization(name="Mayor", classification="executive")
        mayor.add_post("Mayor", "Mayor", division_id="ocd-division/country:us/state:pa/place:pittsburgh")
        mayor.add_source("http://pittsburghpa.gov/mayor/index.html", note="web")
        yield mayor

        # TODO: figure out disambiguation for councilman/mayor positions (using birth_date?)

        # peduto = Person(name="William Peduto", birth_date=datetime.date(1964, 10, 30))
        # peduto.add_term("Mayor",
        #                 "executive",
        #                 start_date=datetime.date(2014, 1 ,6),
        #                 appointment=True)
        # peduto.add_source("http://pittsburghpa.gov/mayor/mayor-profile")
        # yield peduto

        # ravenstahl = Person(name="Luke Ravenstahl", birth_date=datetime.date(1980, 2, 6))
        # ravenstahl.add_term("Mayor",
        #                 "executive",
        #                 start_date=datetime.date(2006, 9, 1),
        #                 end_date=datetime.date(2014, 1 ,6),
        #                 appointment=True)
        # ravenstahl.add_source("https://www.post-gazette.com/local/city/2006/09/01/Ravenstahl-sworn-in-as-Pittsburgh-mayor/stories/200609010229")
        # yield ravenstahl

        city_clerk = Organization(name="City Clerk", classification="department")
        city_clerk.add_post("City Clerk", "City Clerk", division_id="ocd-division/country:us/state:pa/place:pittsburgh")
        city_clerk.add_source("http://pittsburghpa.gov/clerk/", note="web")
        yield city_clerk

        pree = Person(name="Brenda Pree")
        pree.add_term("City Clerk",
                      "department",
                      start_date=datetime.date(2017, 8, 29),
                      appointment=True)
        pree.add_source("http://pittsburghpa.gov/clerk/clerk-bio")
        yield pree

        doheny = Person(name="Mary Beth Doheny")
        doheny.add_term("City Clerk",
                        "department",
                        start_date=datetime.date(2014, 3, 18),
                        end_date=datetime.date(2017, 8, 28),
                        appointment=True)
        doheny.add_source("http://pittsburghpa.gov")
        yield doheny

        # "All Members", frustratingly, has a Person entry in Pittsburgh
        # Legistar, so the import trips without this. Going strong since 1816!

        all_members = Person(name="All Members")
        all_members.add_term("City Council",
                              "legislature",
                              start_date=datetime.date(1816, 3, 18))
        all_members.add_source("http://pittsburghpa.gov/council/index.html")
        yield all_members
Пример #17
0
    def scrape(self):
        url = 'http://alpha.openstates.org/graphql'
        scrapers = [
            {
                'query':
                '{ people(memberOf:"ocd-organization/e91db6f8-2232-49cd-91af-fdb5adb4ac3b", first: 100) { edges { node { name party: currentMemberships(classification:"party") { organization { name }} links { url } sources { url } chamber: currentMemberships(classification:["upper", "lower"]) { post { label } organization { name classification parent { name }}}}}}}'
            },
            #            { 'query': '{ people(memberOf:"ocd-organization/e91db6f8-2232-49cd-91af-fdb5adb4ac3b", last: 100) { edges { node { name party: currentMemberships(classification:"party") { organization { name }} links { url } sources { url } chamber: currentMemberships(classification:["upper", "lower"]) { post { label } organization { name classification parent { name }}}}}}}'},
            {
                'query':
                '{ people(memberOf:"ocd-organization/6a026144-758d-4d57-b856-9c60dce3c4b5", first: 100) { edges { node { name party: currentMemberships(classification:"party") { organization { name }} links { url } sources { url } chamber: currentMemberships(classification:["upper", "lower"]) { post { label } organization { name classification parent { name }}}}}}}'
            },
        ]

        base = requests.get(url=url, json=scrapers[0])
        base = base.json()
        ppl = base['data']['people']['edges']
        for p in ppl:
            p = p['node']
            if p['name'] in rep_names:
                rep_names.remove(p['name'])

        # Get names unretrieved from primary House API Query
        print('REP NAMES: ', rep_names)
        rep_names.remove('Gene Pelowski')

        for rep in rep_names:
            query = '{ people(memberOf:"ocd-organization/e91db6f8-2232-49cd-91af-fdb5adb4ac3b", first: 100, name: "' + rep + '") { edges { node { name party: currentMemberships(classification:"party") { organization { name }} links { url } sources { url } chamber: currentMemberships(classification:["upper", "lower"]) { post { label } organization { name classification parent { name }}}}}}}'
            query = {'query': query}
            scrapers.append(query)
        for s in scrapers:
            base = requests.get(url=url, json=s)
            base = base.json()
            print(base)
            ppl = base['data']['people']['edges']
            for p in ppl:
                p = p['node']
                orgs = p['chamber']
                rep = Person(name=p['name'], role='State Representative')
                for o in orgs:
                    ppr(o)
                    name = o['organization']['name']
                    classification = o['organization']['classification']
                    if o['organization']['parent']:
                        pname = o['organization']['parent']['name']
                        if pname == 'Minnesota Legislature':
                            label = o['post']['label']
                            if 'House' in name:
                                role = 'State Representative'
                            elif 'Senate' in name:
                                role = 'State Senator'
                            rep.add_term(role,
                                         classification,
                                         district=label,
                                         org_name=name)
                            rep.add_source(p['sources'][0]['url'])

                        else:
                            rep.add_membership(name)
                            rep.add_source(p['sources'][0]['url'])
                yield rep
Пример #18
0
    def scrape(self):
        body_types = self.body_types()

        city_council, = [body for body in self.bodies()
                         if body['BodyName'] == 'City Council']

        terms = collections.defaultdict(list)
        for office in self.body_offices(city_council):
            if 'vacan' not in office['OfficeRecordFullName'].lower():
                terms[office['OfficeRecordFullName'].strip()].append(office)

        web_scraper = LegistarPersonScraper(requests_per_minute = self.requests_per_minute)
        web_scraper.MEMBERLIST = 'https://chicago.legistar.com/DepartmentDetail.aspx?ID=12357&GUID=4B24D5A9-FED0-4015-9154-6BFFFB2A8CB4&R=8bcbe788-98cd-4040-9086-b34fa8e49881'
        web_scraper.ALL_MEMBERS = '3:3'

        if self.cache_storage:
            web_scraper.cache_storage = self.cache_storage

        if self.requests_per_minute == 0:
            web_scraper.cache_write_only = False


        web_info = {}
        for member, _ in web_scraper.councilMembers({'ctl00$ContentPlaceHolder$lstName' : 'City Council'}):
            web_info[member['Person Name']['label']] = member


        web_info['Balcer, James'] = collections.defaultdict(lambda : None)
        web_info['Fioretti, Bob'] = collections.defaultdict(lambda : None)
        web_info['Balcer, James']['Ward/Office'] = 11
        web_info['Fioretti, Bob']['Ward/Office'] = 2
        
        members = {}
        for member, offices in terms.items():
            web = web_info[member]
            p = Person(member)
            for term in offices:
                role = term['OfficeRecordTitle']
                p.add_term('Alderman',
                           'legislature',
                           district = "Ward {}".format(int(web['Ward/Office'])),
                           start_date = self.toDate(term['OfficeRecordStartDate']),
                           end_date = self.toDate(term['OfficeRecordEndDate']))

            if web.get('Photo'):
                p.image = web['Photo']

            contact_types = {
                "City Hall Address": ("address", "City Hall Address"),
                "City Hall Phone": ("voice", "City Hall Phone"),
                "Ward Office Phone": ("voice", "Ward Office Phone"),
                "Ward Office Address": ("address", "Ward Office Address"),
                "Fax": ("fax", "Fax")
            }

            for contact_type, (type_, _note) in contact_types.items():
                if web[contact_type] and web[contact_type] != 'N/A':
                    p.add_contact_detail(type=type_,
                                         value= web[contact_type],
                                         note=_note)

            if web["E-mail"] and web["E-mail"]["label"] and web["E-mail"]["label"] != 'N/A':
                p.add_contact_detail(type="email",
                                     value=web['E-mail']['label'],
                                     note='E-mail')


            if web['Website']:
                p.add_link(web['Website']['url'])

            source_urls = self.person_sources_from_office(term)
            person_api_url, person_web_url = source_urls
            p.add_source(person_api_url, note='api')
            p.add_source(person_web_url, note='web')


            members[member] = p

        for body in self.bodies():
            if body['BodyTypeId'] == body_types['Committee']:
                o = Organization(body['BodyName'],
                                 classification='committee',
                                 parent_id={'name' : 'Chicago City Council'})

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body), note='api')
                o.add_source(self.WEB_URL + '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.format(**body), note='web')

                for office in self.body_offices(body):
                    # messed up record for joanna thompson
                    if office['OfficeRecordId'] == 1055:
                        continue
                        
                    role = office['OfficeRecordTitle']
                    if role not in ("Vice Chair", "Chairman"):
                        role = 'Member'

                    person = office['OfficeRecordFullName'].strip()
                    if person in members:
                        p = members[person]
                    else:
                        p = Person(person)
                        
                        source_urls = self.person_sources_from_office(office)
                        person_api_url, person_web_url = source_urls
                        p.add_source(person_api_url, note='api')
                        p.add_source(person_web_url, note='web')

                        members[person] = p

                    try:
                        end_date = self.toDate(office['OfficeRecordEndDate'])
                    except TypeError:
                        end_date = ''
                    p.add_membership(body['BodyName'],
                                     role=role,
                                     start_date=self.toDate(office['OfficeRecordStartDate']),
                                     end_date=end_date)

                yield o

        for body in self.bodies():
            if body['BodyTypeId'] == body_types['Joint Committee']:
                o = Organization(body['BodyName'],
                                 classification='committee',
                                 parent_id={'name' : 'Chicago City Council'})

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body), note='api')
                o.add_source(self.WEB_URL + '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.format(**body), note='web')

                yield o        

        for p in members.values():
            yield p
Пример #19
0
    def scrape(self):
        body_types = self.body_types()

        city_council, = [
            body for body in self.bodies()
            if body['BodyName'] == 'City Council '
        ]

        terms = collections.defaultdict(list)

        for office in self.body_offices(city_council):

            if office['OfficeRecordFullName'] != "Granicus BA":
                terms[office['OfficeRecordFullName']].append(office)

        members = {}

        for member, offices in terms.items():

            p = Person(member)
            for term in offices:
                role = term['OfficeRecordTitle']
                p.add_term(
                    role,
                    'legislature',
                    # district = "District {}".format(int(web['District/Office'])),
                    start_date=self.toDate(term['OfficeRecordStartDate']),
                    end_date=self.toDate(term['OfficeRecordEndDate']))

            source_urls = self.person_sources_from_office(term)
            person_api_url, person_web_url = source_urls
            p.add_source(person_api_url, note='api')
            p.add_source(person_web_url, note='web')

            members[member] = p

        for body in self.bodies():
            if body['BodyTypeId'] == body_types['Standing Committees']:
                o = Organization(body['BodyName'],
                                 classification='committee',
                                 parent_id={'name': 'Sacramento City Council'})

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body),
                             note='api')
                o.add_source(
                    self.WEB_URL +
                    '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.
                    format(**body),
                    note='web')

                for office in self.body_offices(body):
                    # messed up record for joanna thompson
                    if office['OfficeRecordId'] == 1055:
                        continue

                    role = office['OfficeRecordTitle']
                    if role not in ("Vice Chair", "Chairperson"):
                        role = 'Member'

                    person = office['OfficeRecordFullName'].strip()
                    if person in members:
                        p = members[person]
                    else:
                        p = Person(person)

                        source_urls = self.person_sources_from_office(office)
                        person_api_url, person_web_url = source_urls
                        p.add_source(person_api_url, note='api')
                        p.add_source(person_web_url, note='web')

                        members[person] = p

                    p.add_membership(body['BodyName'],
                                     role=role,
                                     start_date=self.toDate(
                                         office['OfficeRecordStartDate']),
                                     end_date=self.toDate(
                                         office['OfficeRecordEndDate']))

                yield o

        for body in self.bodies():
            if body['BodyTypeId'] == body_types['Boards or Commission']:
                o = Organization(body['BodyName'],
                                 classification='commission',
                                 parent_id={'name': 'Sacramento City Council'})

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body),
                             note='api')
                o.add_source(
                    self.WEB_URL +
                    '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.
                    format(**body),
                    note='web')

                yield o

        for p in members.values():
            yield p
Пример #20
0
    def get_organizations(self):

        city = Organization('City of Minneapolis', classification='executive')
        city.add_post('Mayor', 'Mayor', division_id='ocd-division/country:us/state:mn/place:minneapolis')
        city.add_post('City Clerk', 'City Clerk', division_id='ocd-division/country:us/state:mn/place:minneapolis')        
        yield city

        council = Organization(name="Minneapolis City Council", classification="legislature", parent_id=city)
        for x in range(1, 14):
            council.add_post(
                "Ward {}".format(x),
                "Councilmember",
                division_id='ocd-division/country:us/state:mn/place:minneapolis/ward:{}'.format(x))

        yield council


        frey = Person(name="Frey, Jacob")
        frey.add_term('Mayor',
                      'executive',
                      start_date=datetime.date(2018, 1, 19),
                      appointment=True)
        frey.add_source('http://www.google.com')
        yield frey

        parks = Organization('Minneapolis Parks and Recreation', classification='legislature')
        for x in range(1, 7):
            parks.add_post(
                "District {}".format(x),
                "Commissioner")

        parks.add_post("At Large", "Commissioner")

        yield parks

        school = Organization('Minneapolis School Board', classification='legislature')
        for x in range(1, 7):
            school.add_post(
                "District {}".format(x),
                "Director",)

        school.add_post("At Large", "Director")
        yield school
        


        cmt_link = 'https://lims.minneapolismn.gov/Calendar/GetCommittees'
        cmts_site = requests.get(cmt_link)
        cmts = cmts_site.json()
        for c in cmts:
            name = c['Name']
            abbv = c['Abbreviation']
            org_id = c['Id']
            active = c['Active']
            member_count = c['MembersCount']
            purpose = c['Purpose']
            start_date = c['StartDate']
            chair = c['ChairMan']
            members = c['Members']
            location = c['Location']
            address = c['Address']
            mtg_time = c['MeetingTime']
            quorum = c['QuorumCount']

            if name != 'City Council':
                org = Organization(name, classification='committee', parent_id=council)
                org.add_source(cmt_link)
                if start_date != None:
                    org.founding_date = start_date.split('T')[0]
                yield org
Пример #21
0
    def scrape(self):
        web_scraper = LegistarPersonScraper(requests_per_minute = self.requests_per_minute)
        web_scraper.MEMBERLIST = 'http://legistar.council.nyc.gov/DepartmentDetail.aspx?ID=6897&GUID=CDC6E691-8A8C-4F25-97CB-86F31EDAB081&Mode=MainBody'

        if self.cache_storage:
            web_scraper.cache_storage = self.cache_storage

        if self.requests_per_minute == 0:
            web_scraper.cache_write_only = False

        web_info = {}

        for member, _ in web_scraper.councilMembers():
            name = member['Person Name']['label'].strip()
            web_info[name] = member

        city_council, = [body for body in self.bodies()
                         if body['BodyName'] == 'City Council']

        terms = collections.defaultdict(list)

        public_advocates = {  # Match casing to Bill De Blasio as council member
            'The Public Advocate (Mr. de Blasio)': 'Bill De Blasio',
            'The Public Advocate (Ms. James)': 'Letitia James',
        }

        for office in self.body_offices(city_council):
            name = office['OfficeRecordFullName']
            name = public_advocates.get(name, name).strip()

            terms[name].append(office)

            # Add past members (and advocates public)
            if name not in web_info:
                web_info[name] = collections.defaultdict(lambda: None)

        # Check that we have everyone we expect, formatted consistently, in
        # both information arrays. For instance, this will fail if we forget to
        # strip trailing spaces from names on one side or the other (which has
        # the effect of omitting information, such as post, from the scrape).

        assert set(web_info.keys()) == set(terms.keys())

        members = {}

        for member, offices in terms.items():

            p = Person(member)

            web = web_info[member]

            for term in offices:
                role = term['OfficeRecordTitle']

                if role == 'Public Advocate':
                    role = 'Non-Voting Council Member'
                else:
                    role = 'Council Member'

                district = web.get('District', '').replace(' 0', ' ')

                p.add_term(role,
                           'legislature',
                           district=district,
                           start_date=self.toDate(term['OfficeRecordStartDate']),
                           end_date=self.toDate(term['OfficeRecordEndDate']))

                party = web.get('Political Party')

                if party == 'Democrat':
                    party = 'Democratic'

                if party:
                    p.add_party(party)

                if web.get('Photo'):
                    p.image = web['Photo']

                contact_types = {
                    "City Hall Office": ("address", "City Hall Office"),
                    "City Hall Phone": ("voice", "City Hall Phone"),
                    "Ward Office Phone": ("voice", "Ward Office Phone"),
                    "Ward Office Address": ("address", "Ward Office Address"),
                    "Fax": ("fax", "Fax")
                }

                for contact_type, (type_, _note) in contact_types.items():
                    if web.get(contact_type) and web(contact_type) != 'N/A':
                        p.add_contact_detail(type=type_,
                                             value= web[contact_type],
                                             note=_note)

                if web.get('E-mail'):
                    p.add_contact_detail(type="email",
                                         value=web['E-mail']['url'],
                                         note='E-mail')

                if web.get('Web site'):
                    p.add_link(web['Web site']['url'], note='web site')

                if web.get('Notes'):
                    p.extras = {'Notes': web['Notes']}

                if not p.sources:  # Only add sources once
                    source_urls = self.person_sources_from_office(term)
                    person_api_url, person_web_url = source_urls
                    p.add_source(person_api_url, note='api')
                    p.add_source(person_web_url, note='web')

            members[member] = p

        committee_types = ['Committee',
                           'Inactive Committee',
                           'Select Committee',
                           'Subcommittee',
                           'Task Force',
                           'Land Use', # Committee on Land Use
                          ]

        body_types = {k: v for k, v in self.body_types().items()
                      if k in committee_types}

        for body in self.bodies():
            if body['BodyTypeName'] in body_types \
                or body['BodyName'] in ('Legislative Documents Unit',
                                        'Legal and Government Affairs Division'):

                # Skip typo in API data
                if body['BodyName'] == 'Committee on Mental Health, Developmental Disability, Alcoholism, Substance Abuse amd Disability Services':
                    continue

                parent_org = PARENT_ORGS.get(body['BodyName'], 'New York City Council')

                body_name = body['BodyName']

                o = Organization(body_name,
                                 classification='committee',
                                 parent_id={'name': parent_org})

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body), note='api')
                o.add_source(self.WEB_URL + '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.format(**body), note='web')

                for office in self.body_offices(body):
                    # Possible roles: 'Council Member', 'MEMBER', 'Ex-Officio',
                    # 'Committee Member', None, 'CHAIRPERSON'

                    role = office['OfficeRecordTitle']

                    if role and role.lower() == 'chairperson':
                        role = 'Chairperson'
                    else:
                        role = 'Member'

                    person = office['OfficeRecordFullName']
                    person = public_advocates.get(person, person).strip()

                    if person in members:
                        p = members[person]
                    else:
                        p = Person(person)

                        source_urls = self.person_sources_from_office(office)
                        person_api_url, person_web_url = source_urls
                        p.add_source(person_api_url, note='api')
                        p.add_source(person_web_url, note='web')

                        members[person] = p

                    p.add_membership(o,
                                     role=role,
                                     start_date=self.toDate(office['OfficeRecordStartDate']),
                                     end_date=self.toDate(office['OfficeRecordEndDate']))

                yield o

        for p in members.values():
            yield p
Пример #22
0
    def get_organizations(self):
        org = Organization(name="Chicago City Council", classification="legislature")
        for x in range(1, 51):
            org.add_post(
                "Ward {}".format(x),
                "Alderman",
                division_id='ocd-division/country:us/state:il/place:chicago/ward:{}'.format(x))

        yield org

        city = Organization('City of Chicago', classification='executive')
        city.add_post('Mayor', 'Mayor', division_id='ocd-division/country:us/state:il/place:chicago')
        city.add_post('City Clerk', 'City Clerk', division_id='ocd-division/country:us/state:il/place:chicago')

        yield city

        daley = Person(name="Daley, Richard M.")
        daley.add_term('Mayor',
                       'executive',
                       start_date=datetime.date(1989, 4, 24),
                       end_date=datetime.date(2011, 5, 16),
                       appointment=True)
        daley.add_source('https://chicago.legistar.com/People.aspx')
        yield daley

        emanuel = Person(name="Emanuel, Rahm")
        emanuel.add_term('Mayor',
                         'executive',
                         start_date=datetime.date(2011, 5, 16),
                         appointment=True)
        emanuel.add_source('https://chicago.legistar.com/People.aspx')
        yield emanuel

        mendoza = Person(name='Mendoza, Susana A.')
        mendoza.add_term('City Clerk',
                         'executive',
                         start_date=datetime.date(2011, 5, 16),
                         end_date=datetime.date(2016, 12, 4),
                         appointment=True)

        mendoza.add_source('https://chicago.legistar.com/People.aspx')
        yield mendoza

        valle = Person(name='Del Valle, Miguel')
        valle.add_term('City Clerk',
                       'executive',
                       start_date=datetime.date(2006, 12, 1),
                       end_date=datetime.date(2011, 5, 16),
                       appointment=True)

        valle.add_source('https://chicago.legistar.com/People.aspx')
        yield valle

        valencia = Person(name='Valencia, Anna M.')
        valencia.add_term(role='City Clerk',
                          org_classification='executive',
                          start_date=datetime.date(2017, 1, 25),
                          end_date=datetime.date(2019, 5, 20),
                          appointment=True)

        valencia.add_source('https://chicago.legistar.com/People.aspx')
        yield valencia
Пример #23
0
    def scrape(self):
        body_types = self.body_types()

        city_council, = [
            body for body in self.bodies()
            if body['BodyName'] == 'City Council'
        ]

        terms = collections.defaultdict(list)
        for office in self.body_offices(city_council):
            if 'VACAN' not in office['OfficeRecordFullName']:
                terms[office['OfficeRecordFullName'].strip()].append(office)

        web_scraper = LegistarPersonScraper(None, None)
        web_scraper.MEMBERLIST = 'https://chicago.legistar.com/DepartmentDetail.aspx?ID=12357&GUID=4B24D5A9-FED0-4015-9154-6BFFFB2A8CB4&R=8bcbe788-98cd-4040-9086-b34fa8e49881'
        web_scraper.ALL_MEMBERS = '3:3'

        web_info = {}
        for member, _ in web_scraper.councilMembers(
            {'ctl00$ContentPlaceHolder$lstName': 'City Council'}):
            web_info[member['Person Name']['label']] = member

        web_info['Balcer, James'] = collections.defaultdict(lambda: None)
        web_info['Fioretti, Bob'] = collections.defaultdict(lambda: None)
        web_info['Balcer, James']['Ward/Office'] = 11
        web_info['Fioretti, Bob']['Ward/Office'] = 2

        members = {}
        for member, offices in terms.items():
            web = web_info[member]
            p = Person(member)
            for term in offices:
                role = term['OfficeRecordTitle']
                p.add_term('Alderman',
                           'legislature',
                           district="Ward {}".format(int(web['Ward/Office'])),
                           start_date=self.toDate(
                               term['OfficeRecordStartDate']),
                           end_date=self.toDate(term['OfficeRecordEndDate']))

            if web.get('Photo'):
                p.image = web['Photo']

            contact_types = {
                "City Hall Address": ("address", "City Hall Address"),
                "City Hall Phone": ("voice", "City Hall Phone"),
                "Ward Office Phone": ("voice", "Ward Office Phone"),
                "Ward Office Address": ("address", "Ward Office Address"),
                "Fax": ("fax", "Fax")
            }

            for contact_type, (type_, _note) in contact_types.items():
                if web[contact_type] and web[contact_type] != 'N/A':
                    p.add_contact_detail(type=type_,
                                         value=web[contact_type],
                                         note=_note)

            if web["E-mail"] and web["E-mail"][
                    "label"] and web["E-mail"]["label"] != 'N/A':
                p.add_contact_detail(type="email",
                                     value=web['E-mail']['label'],
                                     note='E-mail')

            if web['Website']:
                p.add_link(web['Website']['url'])

            source_urls = self.person_sources_from_office(term)
            person_api_url, person_web_url = source_urls
            p.add_source(person_api_url, note='api')
            p.add_source(person_web_url, note='web')

            members[member] = p

        for body in self.bodies():
            if body['BodyTypeId'] == body_types['Committee']:
                o = Organization(body['BodyName'],
                                 classification='committee',
                                 parent_id={'name': 'Chicago City Council'})

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body),
                             note='api')
                o.add_source(
                    self.WEB_URL +
                    '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.
                    format(**body),
                    note='web')

                for office in self.body_offices(body):
                    # messed up record for joanna thompson
                    if office['OfficeRecordId'] == 1055:
                        continue

                    role = office['OfficeRecordTitle']
                    if role not in ("Vice Chair", "Chairman"):
                        role = 'Member'

                    person = office['OfficeRecordFullName'].strip()
                    if person in members:
                        p = members[person]
                    else:
                        p = Person(person)

                        source_urls = self.person_sources_from_office(office)
                        person_api_url, person_web_url = source_urls
                        p.add_source(person_api_url, note='api')
                        p.add_source(person_web_url, note='web')

                        members[person] = p

                    p.add_membership(body['BodyName'],
                                     role=role,
                                     start_date=self.toDate(
                                         office['OfficeRecordStartDate']),
                                     end_date=self.toDate(
                                         office['OfficeRecordEndDate']))

                yield o

        for body in self.bodies():
            if body['BodyTypeId'] == body_types['Joint Committee']:
                o = Organization(body['BodyName'],
                                 classification='committee',
                                 parent_id={'name': 'Chicago City Council'})

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body),
                             note='api')
                o.add_source(
                    self.WEB_URL +
                    '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.
                    format(**body),
                    note='web')

                yield o

        for p in members.values():
            yield p
Пример #24
0
    def scrape(self):
        body_types = self.body_types()
        city_council, = [body for body in self.bodies()
                         if body["BodyName"] == "City Council"]
        terms = collections.defaultdict(list)

        for office in self.body_offices(city_council):
            if "VACAN" not in office["OfficeRecordFullName"]:
                terms[office["OfficeRecordFullName"].strip()].append(office)

        web_scraper = LegistarPersonScraper(requests_per_minute=self.requests_per_minute)
        web_scraper.MEMBERLIST = "https://pittsburgh.legistar.com/People.aspx"
        web_scraper.COMMITTEELIST = "https://pittsburgh.legistar.com/Departments.aspx"

        if self.cache_storage:
            web_scraper.cache_storage = self.cache_storage

        if self.requests_per_minute == 0:
            web_scraper.cache_write_only = False

        web_info = {}
        for member in web_scraper.councilMembers():
            web_info[member["Person Name"]] = member

        members = {}
        for member, offices in terms.items():
            person = Person(member)
            for term in offices:
                role = term["OfficeRecordTitle"]
                person.add_term("Councilmember",
                                "legislature",
                                start_date = self.toDate(term["OfficeRecordStartDate"]),
                                end_date = self.toDate(term["OfficeRecordEndDate"]))

            if member in web_info:
                web = web_info[member]
                if web["E-mail"] and web["E-mail"]["label"] and web["E-mail"]["label"] != "N/A":
                    person.add_contact_detail(type="email",
                                        value=web["E-mail"]["label"],
                                        note="E-mail")

            person_source_data = self.person_sources_from_office(term)
            person_api_url, person_api_response = person_source_data
            person.add_source(person_api_url, note="api")

            if person_api_response["PersonAddress1"]:
                address = (person_api_response["PersonAddress1"] + ", " + person_api_response["PersonCity1"]
                          + ", " + person_api_response["PersonState1"] + " " + person_api_response["PersonZip1"])
                person.add_contact_detail(type="address",
                                    value=address,
                                    note="Office address")

            if person_api_response["PersonPhone"]:
                person.add_contact_detail(type="voice",
                                    value=person_api_response["PersonPhone"],
                                    note="Office phone")

            if person_api_response["PersonWWW"]:
                person.add_contact_detail(type="url",
                                    value=person_api_response["PersonWWW"],
                                    note="District website")

            members[member] = person


        for body in self.bodies():
            if body["BodyTypeId"] == body_types["Committee"]:
                body_name_clean = body["BodyName"].strip()
                organization = Organization(body_name_clean,
                             classification="committee",
                             parent_id={"name" : "Pittsburgh City Council"})

                organization.add_source(self.BASE_URL + "/bodies/{BodyId}".format(**body), note="api")

                for office in self.body_offices(body):
                    role = office["OfficeRecordMemberType"]
                    if role not in ("Vice Chair", "Chair") or role == "Councilmember":
                        role = "Member"

                    person = office["OfficeRecordFullName"].strip()
                    if person in members:
                        person = members[person]
                    else:
                        person = Person(person)

                    person.add_membership(body_name_clean,
                                     role=role,
                                     start_date = self.toDate(office["OfficeRecordStartDate"]),
                                     end_date = self.toDate(office["OfficeRecordEndDate"]))

                yield organization

        for person in members.values():
            yield person
Пример #25
0
    def scrape(self):
        body_types = self.body_types()

        board_of_directors, = [
            body for body in self.bodies()
            if body['BodyName'] == 'Board of Directors - Regular Board Meeting'
        ]
        board_of_directors["BodyName"] = "Board of Directors"

        terms = collections.defaultdict(list)
        for office in self.body_offices(board_of_directors):
            terms[office['OfficeRecordFullName']].append(office)

        members = {}
        for member, offices in terms.items():
            p = Person(member)
            for term in offices:
                role = term['OfficeRecordTitle']

                if role not in {'Board Member', 'non-voting member'}:
                    p.add_term(
                        role,
                        'legislature',
                        start_date=self.toDate(term['OfficeRecordStartDate']),
                        end_date=self.toDate(term['OfficeRecordEndDate']),
                        appointment=True)
                if role != 'Chief Executive Officer':
                    if role == 'non-voting member':
                        member_type = 'Nonvoting Board Member'
                        post = NONVOTING_POSTS.get(member)
                    else:
                        member_type = 'Board Member'
                        post = VOTING_POSTS.get(member)

                    p.add_term(
                        member_type,
                        'legislature',
                        district=post,
                        start_date=self.toDate(term['OfficeRecordStartDate']),
                        end_date=self.toDate(term['OfficeRecordEndDate']))

            source_urls = self.person_sources_from_office(term)
            person_api_url, person_web_url = source_urls
            p.add_source(person_api_url, note='api')
            p.add_source(person_web_url, note='web')

            members[member] = p

        for body in self.bodies():
            if body['BodyTypeId'] == body_types['Committee']:
                org_name = body['BodyName'].strip()
                o = Organization(org_name,
                                 classification='committee',
                                 parent_id={'name': 'Board of Directors'})

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body),
                             note='api')
                o.add_source(
                    self.WEB_URL +
                    '/DepartmentDetail.aspx?ID={BodyId}&GUID={BodyGuid}'.
                    format(**body),
                    note='web')

                for office in self.body_offices(body):
                    role = office['OfficeRecordTitle']

                    if role not in ("Chair", "Vice Chair"):
                        if role == 'non-voting member':
                            role = 'Nonvoting Member'
                        else:
                            role = 'Member'

                    person = office['OfficeRecordFullName']

                    if person in members:
                        p = members[person]
                    else:
                        p = Person(person)

                        source_urls = self.person_sources_from_office(office)
                        person_api_url, person_web_url = source_urls
                        p.add_source(person_api_url, note='api')
                        p.add_source(person_web_url, note='web')

                        members[person] = p

                    p.add_membership(org_name,
                                     role=role,
                                     start_date=self.toDate(
                                         office['OfficeRecordStartDate']),
                                     end_date=self.toDate(
                                         office['OfficeRecordEndDate']))

                yield o

        for p in members.values():
            yield p
Пример #26
0
    def scrape(self):
        '''
        Scrape the web to create a dict with all active organizations.
        Then, we can access the correct URL for the organization detail page.
        '''
        web_scraper = LegistarPersonScraper(requests_per_minute=self.requests_per_minute)
        web_scraper.MEMBERLIST = 'https://metro.legistar.com/People.aspx'
        web_info = {}

        for _, organizations in web_scraper.councilMembers():
            for organization, _, _ in organizations:
                organization_name = organization['Department Name']['label'].strip()
                organization_info = organization['Department Name']

                web_info[organization_name] = organization_info

        body_types = self.body_types()

        board_of_directors, = [body for body in self.bodies()
                               if body['BodyName'] == 'Board of Directors - Regular Board Meeting']
        board_of_directors["BodyName"] = "Board of Directors"

        terms = collections.defaultdict(list)
        for office in self.body_offices(board_of_directors):
            terms[office['OfficeRecordFullName']].append(office)

        members = {}
        for member, offices in terms.items():
            p = Person(member)
            for term in offices:
                role = term['OfficeRecordTitle']

                if role not in {'Board Member', 'non-voting member'}:
                    p.add_term(role,
                               'legislature',
                               start_date = self.toDate(term['OfficeRecordStartDate']),
                               end_date = self.toDate(term['OfficeRecordEndDate']),
                               appointment = True)
                if role != 'Chief Executive Officer':
                    if role == 'non-voting member':
                        member_type = 'Nonvoting Board Member'
                        post = NONVOTING_POSTS.get(member)
                    else:
                        member_type = 'Board Member'
                        post = VOTING_POSTS.get(member)

                    start_date = self.toDate(term['OfficeRecordStartDate'])
                    end_date = self.toDate(term['OfficeRecordEndDate'])
                    board_membership = p.add_term(member_type,
                               'legislature',
                               district = post,
                               start_date = start_date,
                               end_date = end_date)

                    acting_member_end_date = ACTING_MEMBERS_WITH_END_DATE.get(p.name)
                    if acting_member_end_date and acting_member_end_date <= end_date:
                        board_membership.extras = {'acting': 'true'}

            source_urls = self.person_sources_from_office(term)
            person_api_url, person_web_url = source_urls
            p.add_source(person_api_url, note='api')
            p.add_source(person_web_url, note='web')

            members[member] = p

        for body in self.bodies():
            if body['BodyTypeId'] == body_types['Committee']:
                organization_name = body['BodyName'].strip()
                o = Organization(organization_name,
                                 classification='committee',
                                 parent_id={'name' : 'Board of Directors'})

                organization_info = web_info.get(organization_name, {})
                organization_url = organization_info.get('url', self.WEB_URL + 'https://metro.legistar.com/Departments.aspx')

                o.add_source(self.BASE_URL + '/bodies/{BodyId}'.format(**body), note='api')
                o.add_source(organization_url, note='web')

                for office in self.body_offices(body):
                    role = office['OfficeRecordTitle']


                    if role not in ("Chair", "Vice Chair", "Chief Executive Officer"):
                        if role == 'non-voting member':
                            role = 'Nonvoting Member'
                        else:
                            role = 'Member'

                    person = office['OfficeRecordFullName']

                    if person in members:
                        p = members[person]
                    else:
                        p = Person(person)

                        source_urls = self.person_sources_from_office(office)
                        person_api_url, person_web_url = source_urls
                        p.add_source(person_api_url, note='api')
                        p.add_source(person_web_url, note='web')

                        members[person] = p

                    start_date = self.toDate(office['OfficeRecordStartDate'])
                    end_date = self.toDate(office['OfficeRecordEndDate'])
                    membership = p.add_membership(organization_name,
                                     role=role,
                                     start_date=start_date,
                                     end_date=end_date)

                    acting_member_end_date = ACTING_MEMBERS_WITH_END_DATE.get(p.name)
                    if acting_member_end_date and acting_member_end_date <= end_date:
                        membership.extras = {'acting': 'true'}

                yield o

        for p in members.values():
            yield p
Пример #27
0
    def scrape(self):
        noncommittees = {'Committee of the Whole'}
        committee_d = {}

        people_d = {}

        # Go to memberlist
        extra_args = {'ctl00$ContentPlaceHolder$lstName': 'City Council'}

        for councilman, committees in self.councilMembers(
                extra_args=extra_args):

            if 'url' in councilman['Person Name']:
                councilman_url = councilman['Person Name']['url']

                if councilman_url in people_d:
                    people_d[councilman_url][0].append(councilman)
                else:
                    people_d[councilman_url] = [councilman], committees

        for person_entries, committees in people_d.values():

            councilman = person_entries[-1]

            p = Person(councilman['Person Name']['label'])

            if p.name == 'Letitia James':
                p.name = 'Letitia Ms. James'
                p.add_name('Letitia James')

            spans = [(self.toTime(entry['Start Date']).date(),
                      self.toTime(entry['End Date']).date(), entry['District'])
                     for entry in person_entries]

            merged_spans = []
            last_end_date = None
            last_district = None
            for start_date, end_date, district in sorted(spans):
                if last_end_date is None:
                    span = [start_date, end_date, district]
                elif (start_date - last_end_date
                      ) == datetime.timedelta(1) and district == last_district:
                    span[1] = end_date
                else:
                    merged_spans.append(span)
                    span = [start_date, end_date, district]

                last_end_date = end_date
                last_district = district

            merged_spans.append(span)

            for start_date, end_date, district in merged_spans:
                district = councilman['District'].replace(' 0', ' ')
                if end_date == datetime.date(2017, 12, 31):
                    end_date = ''
                else:
                    end_date = end_date.isoformat()
                print(start_date, end_date)
                p.add_term('Council Member',
                           'legislature',
                           district=district,
                           start_date=start_date.isoformat(),
                           end_date=end_date)

            party = councilman['Political Party']
            if party == 'Democrat':
                party = 'Democratic'

            if party:
                p.add_party(party)

            if councilman['Photo']:
                p.image = councilman['Photo']

            if councilman["E-mail"]:
                p.add_contact_detail(type="email",
                                     value=councilman['E-mail']['url'],
                                     note='E-mail')

            if councilman['Web site']:
                p.add_link(councilman['Web site']['url'], note='web site')

            p.extras = {'Notes': councilman['Notes']}

            p.add_source(councilman['Person Name']['url'], note='web')

            for committee, _, _ in committees:
                committee_name = committee['Department Name']['label']
                if committee_name not in noncommittees and 'committee' in committee_name.lower(
                ):
                    o = committee_d.get(committee_name, None)
                    if o is None:
                        parent_id = PARENT_ORGS.get(committee_name,
                                                    'New York City Council')
                        o = Organization(committee_name,
                                         classification='committee',
                                         parent_id={'name': parent_id})
                        o.add_source(committee['Department Name']['url'])
                        committee_d[committee_name] = o

                    membership = o.add_member(p, role=committee["Title"])
                    membership.start_date = self.mdY2Ymd(
                        committee["Start Date"])
            yield p

        for o in committee_d.values():
            if 'Committee' in o.name:
                yield o

        for o in committee_d.values():
            if 'Subcommittee' in o.name:
                yield o

        o = Organization(
            'Committee on Mental Health, Developmental Disability, Alcoholism, Drug Abuse and Disability Services',
            classification='committee',
            parent_id={'name': 'New York City Council'})
        o.add_source("http://legistar.council.nyc.gov/Departments.aspx")

        yield o

        o = Organization(
            'Subcommittee on Drug Abuse',
            classification='committee',
            parent_id={
                'name':
                'Committee on Mental Health, Developmental Disability, Alcoholism, Drug Abuse and Disability Services'
            })
        o.add_source("http://legistar.council.nyc.gov/Departments.aspx")

        yield o
Пример #28
0
    def scrape(self):
        committee_d = {}
        non_committees = {
            'City Council', 'Office of the Mayor', 'Office of the City Clerk'
        }

        for councilman, committees in self.councilMembers():
            if councilman['Ward/Office'] == "":
                continue

            ward = councilman['Ward/Office']
            if ward not in {"Mayor", "Clerk"}:

                ward = "Ward {}".format(int(ward))
                role = "Alderman"
                p = Person(councilman['Person Name']['label'],
                           district=ward,
                           primary_org="legislature",
                           role=role)

            if councilman['Photo']:
                p.image = councilman['Photo']

            contact_types = {
                "City Hall Office": ("address", "City Hall Office"),
                "City Hall Phone": ("voice", "City Hall Phone"),
                "Ward Office Phone": ("voice", "Ward Office Phone"),
                "Ward Office Address": ("address", "Ward Office Address"),
                "Fax": ("fax", "Fax")
            }

            for contact_type, (type_, _note) in contact_types.items():
                if councilman[contact_type]:
                    p.add_contact_detail(type=type_,
                                         value=councilman[contact_type],
                                         note=_note)

            if councilman["E-mail"]:
                p.add_contact_detail(type="email",
                                     value=councilman['E-mail']['label'],
                                     note='E-mail')

            if councilman['Website']:
                p.add_link(councilman['Website']['url'])
            p.add_source(councilman['Person Name']['url'], note='web')

            for committee, _, _ in committees:
                committee_name = committee['Legislative Body']['label']
                if committee_name and committee_name not in non_committees:
                    o = committee_d.get(committee_name, None)
                    if o is None:
                        o = Organization(
                            committee_name,
                            classification='committee',
                            parent_id={'name': 'Chicago City Council'})
                        o.add_source(committee['Legislative Body']['url'],
                                     note='web')
                        committee_d[committee_name] = o

                    o.add_member(p, role=committee["Title"])

            yield p

        for name, term in FORMER_ALDERMEN.items():
            p = Person(name=name,
                       primary_org="legislature",
                       start_date=term['term'][0],
                       end_date=term['term'][1],
                       district="Ward {}".format(term['ward']),
                       role='Alderman')
            if name == 'Chandler, Michael D.':
                p.add_term('Alderman',
                           "legislature",
                           district="Ward {}".format(term['ward']),
                           start_date=datetime.date(2011, 5, 16),
                           end_date=datetime.date(2015, 5, 18))

            p.add_source(term['source'], note='web')
            yield p

        for o in committee_d.values():
            yield o

        for committee_name in FORMER_COMMITTEES:
            o = Organization(committee_name,
                             classification='committee',
                             parent_id={'name': 'Chicago City Council'})
            o.add_source("https://chicago.legistar.com/Departments.aspx",
                         note='web')
            yield o

        for joint_committee in JOINT_COMMITTEES:

            o = Organization(joint_committee,
                             classification='committee',
                             parent_id={'name': 'Chicago City Council'})
            o.add_source("https://chicago.legistar.com/Departments.aspx",
                         note='web')
            yield o
Пример #29
0
    def scrape(self):
        committee_d = {}
        non_committees = {'City Council', 'Office of the Mayor',
                          'Office of the City Clerk'}

        for councilman, committees in self.councilMembers() :
            if councilman['Ward/Office'] == "":
                continue

            ward = councilman['Ward/Office']
            if ward not in {"Mayor", "Clerk"} :

                ward = "Ward {}".format(int(ward))
                role = "Alderman"
                p = Person(councilman['Person Name']['label'],
                           district=ward,
                           primary_org="legislature",
                           role=role)
                

            if councilman['Photo'] :
                p.image = councilman['Photo']

            contact_types = {
                "City Hall Office": ("address", "City Hall Office"),
                "City Hall Phone": ("voice", "City Hall Phone"),
                "Ward Office Phone": ("voice", "Ward Office Phone"),
                "Ward Office Address": ("address", "Ward Office Address"),
                "Fax": ("fax", "Fax")
            }

            for contact_type, (type_, _note) in contact_types.items():
                if councilman[contact_type]:
                    p.add_contact_detail(type=type_,
                                         value= councilman[contact_type],
                                         note=_note)

            if councilman["E-mail"]:
                p.add_contact_detail(type="email",
                                     value=councilman['E-mail']['label'],
                                     note='E-mail')


            if councilman['Website']:
                p.add_link(councilman['Website']['url'])
            p.add_source(councilman['Person Name']['url'], note='web')

            for committee, _, _ in committees:
                committee_name = committee['Legislative Body']['label']
                if committee_name and committee_name not in non_committees:
                    o = committee_d.get(committee_name, None)
                    if o is None:
                        o = Organization(committee_name,
                                         classification='committee',
                                         parent_id={'name' : 'Chicago City Council'})
                        o.add_source(committee['Legislative Body']['url'], 
                                     note='web')
                        committee_d[committee_name] = o

                    o.add_member(p, role=committee["Title"])

            yield p

        for name, term in FORMER_ALDERMEN.items() :
            p =  Person(name=name,
                        primary_org="legislature",
                        start_date=term['term'][0],
                        end_date=term['term'][1],
                        district="Ward {}".format(term['ward']),
                        role='Alderman')
            if name == 'Chandler, Michael D.' :
                p.add_term('Alderman',
                           "legislature",
                           district="Ward {}".format(term['ward']),
                           start_date=datetime.date(2011, 5, 16),
                           end_date=datetime.date(2015, 5, 18))

            p.add_source(term['source'], note='web')
            yield p

        for o in committee_d.values() :
            yield o

        for committee_name in FORMER_COMMITTEES :
            o = Organization(committee_name, 
                             classification='committee',
                             parent_id={'name' : 'Chicago City Council'})
            o.add_source("https://chicago.legistar.com/Departments.aspx", 
                         note='web')
            yield o

        for joint_committee in JOINT_COMMITTEES :

            o = Organization(joint_committee, 
                             classification='committee',
                             parent_id={'name' : 'Chicago City Council'})
            o.add_source("https://chicago.legistar.com/Departments.aspx",
                         note='web')
            yield o
Пример #30
0
    def get_organizations(self):
        org = Organization(name="Pittsburgh City Council",
                           classification="legislature")
        for x in range(1, 10):
            org.add_post(
                label="District {}".format(str(x)),
                role="Councilmember",
                division_id=
                "ocd-division/country:us/state:pa/place:pittsburgh/council_district:{}"
                .format(x))
        yield org

        mayor = Organization(name="Mayor", classification="executive")
        mayor.add_post(
            "Mayor",
            "Mayor",
            division_id="ocd-division/country:us/state:pa/place:pittsburgh")
        mayor.add_source("http://pittsburghpa.gov/mayor/index.html",
                         note="web")
        yield mayor

        standing_committee = Organization(name="Standing Committee",
                                          classification="committee")
        standing_committee.add_source(
            "http://pittsburghpa.gov/council/standing-committees", note="web")
        yield standing_committee

        # there are a number of committees that no longer exist but have old bills attached to them
        construction_committee = Organization(
            name="Committee on Engineering & Construction",
            classification="committee")
        construction_committee.add_source(self.url, note="web")
        yield construction_committee

        forestry_committee = Organization(
            name="Committee on Engineering, Fleet and Forestry",
            classification="committee")
        forestry_committee.add_source(self.url, note="web")
        yield forestry_committee

        facilities_committee = Organization(
            name="Committee on Facilities, Technology & the Arts",
            classification="committee")
        facilities_committee.add_source(self.url, note="web")
        yield facilities_committee

        budget_committee = Organization(name="Committee on Finance & Budget",
                                        classification="committee")
        budget_committee.add_source(self.url, note="web")
        yield budget_committee

        purchasing_committee = Organization(
            name="Committee on Finance, Law and Purchasing",
            classification="committee")
        purchasing_committee.add_source(self.url, note="web")
        yield purchasing_committee

        govt_services_committee = Organization(
            name="Committee on General and Government Services",
            classification="committee")
        govt_services_committee.add_source(self.url, note="web")
        yield govt_services_committee

        telecom_committee = Organization(
            name="Committee on General Services & Telecommunications",
            classification="committee")
        telecom_committee.add_source(self.url, note="web")
        yield telecom_committee

        arts_committee = Organization(
            name="Committee on General Services, Technology & the Arts",
            classification="committee")
        arts_committee.add_source(self.url, note="web")
        yield arts_committee

        housing_committee = Organization(
            name="Committee on Housing, Economic Development & Promotion",
            classification="committee")
        housing_committee.add_source(self.url, note="web")
        yield housing_committee

        parks_committee = Organization(
            name="Committee on Parks, Recreation & Youth Policy",
            classification="committee")
        parks_committee.add_source(self.url, note="web")
        yield parks_committee

        zoning_committee = Organization(
            name="Committee on Planning, Zoning & Land Use",
            classification="committee")
        zoning_committee.add_source(self.url, note="web")
        yield zoning_committee

        env_committee = Organization(
            name="Committee on Public Works & Environmental Services",
            classification="committee")
        env_committee.add_source(self.url, note="web")
        yield env_committee

        # for whatever reason these the clerk's office has also classified these next 3 as committees in Legistar
        mayor_agenda = Organization(
            name="Mayor's Agenda - Legislation to be Presented",
            classification="committee")
        mayor_agenda.add_source(self.url, note="web")
        yield mayor_agenda

        post_agenda = Organization(name="Post Agenda",
                                   classification="committee")
        post_agenda.add_source(self.url, note="web")
        yield post_agenda

        post_agenda_mtg = Organization(name="Post Agenda Meeting",
                                       classification="committee")
        post_agenda_mtg.add_source(self.url, note="web")
        yield post_agenda_mtg

        hearing_sched = Organization(name="PUBLIC HEARING SCHEDULE",
                                     classification="committee")
        hearing_sched.add_source(self.url, note="web")
        yield hearing_sched

        executive_session = Organization(name="Executive Session",
                                         classification="committee")
        executive_session.add_source(self.url, note="web")
        yield executive_session

        city_clerk = Organization(name="City Clerk",
                                  classification="department")
        city_clerk.add_post(
            "City Clerk",
            "City Clerk",
            division_id="ocd-division/country:us/state:pa/place:pittsburgh")
        city_clerk.add_source("http://pittsburghpa.gov/clerk/", note="web")
        yield city_clerk

        pree = Person(name="Brenda Pree")
        pree.add_term("City Clerk",
                      "department",
                      start_date=datetime.date(2017, 8, 29),
                      appointment=True)
        pree.add_source("http://pittsburghpa.gov/clerk/clerk-bio")
        yield pree

        doheny = Person(name="Mary Beth Doheny")
        doheny.add_term("City Clerk",
                        "department",
                        start_date=datetime.date(2014, 3, 18),
                        end_date=datetime.date(2017, 8, 28),
                        appointment=True)
        doheny.add_source("http://pittsburghpa.gov")
        yield doheny

        # "All Members", frustratingly, has a Person entry in Pittsburgh
        # Legistar, so the import trips without this. Going strong since 1816!

        all_members = Person(name="All Members")
        all_members.add_term("City Council",
                             "legislature",
                             start_date=datetime.date(1816, 3, 18))
        all_members.add_source("http://pittsburghpa.gov/council/index.html")
        yield all_members