Python Person.add_identifier示例

编程语言: Python

命名空间/包名称: pupa.scrape

类/类型: Person

方法/功能: add_identifier

hotexamples.com的示例: 12

Python Person.add_identifier - 已找到12个示例。这些是从开源项目中提取的最受好评的pupa.scrape.Person.add_identifier现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Person(30)

add_contact_detail(30)

add_link(30)

add_source(30)

add_membership(23)

add_term(23)

as_dict(21)

extras(12)

add_party(8)

add_identifier(7)

birth_date(5)

add_name(4)

add_contact_deatil(2)

add_member(1)

biography(1)

contact_details(1)

示例#1

显示文件

文件： test_people_importer.py 项目： johnfelipe/pupa

def test_full_person():
    person = ScrapePerson('Tom Sawyer')
    person.add_identifier('1')
    person.add_name('Tommy', start_date='1880')
    person.add_contact_detail(type='phone', value='555-555-1234', note='this is fake')
    person.add_link('http://example.com/link')
    person.add_source('http://example.com/source')

    # import person
    pd = person.as_dict()
    PersonImporter('jurisdiction-id').import_data([pd])

    # get person from db and assert it imported correctly
    p = Person.objects.get()
    assert 'ocd-person' in p.id
    assert p.name == person.name

    assert p.identifiers.all()[0].identifier == '1'
    assert p.identifiers.all()[0].scheme == ''

    assert p.other_names.all()[0].name == 'Tommy'
    assert p.other_names.all()[0].start_date == '1880'

    assert p.contact_details.all()[0].type == 'phone'
    assert p.contact_details.all()[0].value == '555-555-1234'
    assert p.contact_details.all()[0].note == 'this is fake'

    assert p.links.all()[0].url == 'http://example.com/link'
    assert p.sources.all()[0].url == 'http://example.com/source'

示例#2

显示文件

文件： test_bill_importer.py 项目： sudhanshuchopra/pupa

def test_bill_sponsor_by_identifier():
    create_jurisdiction()
    org = create_org()

    bill = ScrapeBill('HB 1',
                      '1900',
                      'Axe & Tack Tax Act',
                      classification='tax bill',
                      chamber='lower')
    bill.add_sponsorship_by_identifier(name="SNODGRASS",
                                       classification='sponsor',
                                       entity_type='person',
                                       primary=True,
                                       identifier="TOTALLY_REAL_ID",
                                       scheme="TOTALLY_REAL_SCHEME")

    oi = OrganizationImporter('jid')
    pi = PersonImporter('jid')

    zs = ScrapePerson(name='Zadock Snodgrass')
    zs.add_identifier(identifier='TOTALLY_REAL_ID',
                      scheme='TOTALLY_REAL_SCHEME')
    pi.import_data([zs.as_dict()])
    za_db = Person.objects.get()
    Membership.objects.create(person_id=za_db.id, organization_id=org.id)

    BillImporter('jid', oi, pi).import_data([bill.as_dict()])

    obj = Bill.objects.get()
    (entry, ) = obj.sponsorships.all()
    assert entry.person.name == "Zadock Snodgrass"

示例#3

显示文件

文件： test_bill_importer.py 项目： rshorey/pupa

def test_bill_sponsor_by_identifier():
    create_jurisdiction()
    org = create_org()

    bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act',
                      classification='tax bill', chamber='lower')
    bill.add_sponsorship_by_identifier(name="SNODGRASS",
                                       classification='sponsor',
                                       entity_type='person',
                                       primary=True,
                                       identifier="TOTALLY_REAL_ID",
                                       scheme="TOTALLY_REAL_SCHEME")

    oi = OrganizationImporter('jid')
    pi = PersonImporter('jid')

    zs = ScrapePerson(name='Zadock Snodgrass')
    zs.add_identifier(identifier='TOTALLY_REAL_ID',
                      scheme='TOTALLY_REAL_SCHEME')
    pi.import_data([zs.as_dict()])
    za_db = Person.objects.get()
    Membership.objects.create(person_id=za_db.id,
                              organization_id=org.id)

    BillImporter('jid', oi, pi).import_data([bill.as_dict()])

    obj = Bill.objects.get()
    (entry,) = obj.sponsorships.all()
    assert entry.person.name == "Zadock Snodgrass"

示例#4

显示文件

def test_full_person():
    person = ScrapePerson('Tom Sawyer')
    person.add_identifier('1')
    person.add_name('Tommy', start_date='1880')
    person.add_contact_detail(type='phone',
                              value='555-555-1234',
                              note='this is fake')
    person.add_link('http://example.com/link')
    person.add_source('http://example.com/source')

    # import person
    pd = person.as_dict()
    PersonImporter('jurisdiction-id').import_data([pd])

    # get person from db and assert it imported correctly
    p = Person.objects.get()
    assert 'ocd-person' in p.id
    assert p.name == person.name

    assert p.identifiers.all()[0].identifier == '1'
    assert p.identifiers.all()[0].scheme == ''

    assert p.other_names.all()[0].name == 'Tommy'
    assert p.other_names.all()[0].start_date == '1880'

    assert p.contact_details.all()[0].type == 'phone'
    assert p.contact_details.all()[0].value == '555-555-1234'
    assert p.contact_details.all()[0].note == 'this is fake'

    assert p.links.all()[0].url == 'http://example.com/link'
    assert p.sources.all()[0].url == 'http://example.com/source'

示例#5

显示文件

文件： people.py 项目： datamade/hearings

    def scrape(self):
        current_path = Path(__file__)
        legislator_path = current_path.parent / 'congress-legislators/legislators-historical.yaml'

        with legislator_path.open() as f:
            legislators = yaml.load(f, Loader=yaml.CLoader)

        for legislator in legislators:
            if all(term['end'] < '1970' for term in legislator['terms']):
                continue

            l = Person(name=' '.join(
                (legislator['name']['first'], legislator['name']['last'])),
                       birth_date=legislator['bio'].get('birthday', ''),
                       gender=legislator['bio']['gender'])

            parties = set()
            for term in legislator['terms']:
                state = term['state']
                parties.add(term['party'])

                if term['type'] == 'rep':
                    role = 'Representative'
                    district_name = self._district_name(
                        state, term['district'])
                    chamber = 'lower'
                else:
                    role = "Senator"
                    district_name = "{state}, Class {klass}".format(
                        state=state, klass=term['class'])
                    chamber = 'upper'

                l.add_term(role,
                           chamber,
                           district=district_name,
                           start_date=term['start'],
                           end_date=term['end'])

            for party in parties:
                l.add_party(party)

            for scheme, identifier in legislator['id'].items():
                l.add_identifier(str(identifier), scheme=scheme)

            l.add_source(
                'https://github.com/unitedstates/congress-legislators/blob/master/legislators-historical.yaml'
            )

            yield l

示例#6

显示文件

文件： test_bill_importer.py 项目： sudhanshuchopra/pupa

def test_bill_sponsor_limit_lookup():
    create_jurisdiction()
    org = create_org()

    bill = ScrapeBill('HB 1',
                      '1900',
                      'Axe & Tack Tax Act',
                      classification='tax bill',
                      chamber='lower')
    bill.add_sponsorship_by_identifier(name="SNODGRASS",
                                       classification='sponsor',
                                       entity_type='person',
                                       primary=True,
                                       identifier="TOTALLY_REAL_ID",
                                       scheme="TOTALLY_REAL_SCHEME")

    oi = OrganizationImporter('jid')
    pi = PersonImporter('jid')

    zs = ScrapePerson(name='Zadock Snodgrass', birth_date="1800-01-01")
    zs.add_identifier(identifier='TOTALLY_REAL_ID',
                      scheme='TOTALLY_REAL_SCHEME')
    pi.import_data([zs.as_dict()])

    za_db = Person.objects.get()
    Membership.objects.create(person_id=za_db.id, organization_id=org.id)

    zs2 = ScrapePerson(name='Zadock Snodgrass', birth_date="1900-01-01")
    zs2.add_identifier(identifier='TOTALLY_REAL_ID',
                       scheme='TOTALLY_REAL_SCHEME')

    # This is contrived and perhaps broken, but we're going to check this.
    # We *really* don't want to *ever* cross jurisdiction bounds.
    PersonImporter('another-jurisdiction').import_data([zs.as_dict()])

    BillImporter('jid', oi, pi).import_data([bill.as_dict()])

    obj = Bill.objects.get()
    (entry, ) = obj.sponsorships.all()
    assert entry.person.name == "Zadock Snodgrass"
    assert entry.person.birth_date == "1800-01-01"

示例#7

显示文件

文件： test_bill_importer.py 项目： rshorey/pupa

def test_bill_sponsor_limit_lookup():
    create_jurisdiction()
    org = create_org()

    bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act',
                      classification='tax bill', chamber='lower')
    bill.add_sponsorship_by_identifier(name="SNODGRASS",
                                       classification='sponsor',
                                       entity_type='person',
                                       primary=True,
                                       identifier="TOTALLY_REAL_ID",
                                       scheme="TOTALLY_REAL_SCHEME")

    oi = OrganizationImporter('jid')
    pi = PersonImporter('jid')

    zs = ScrapePerson(name='Zadock Snodgrass', birth_date="1800-01-01")
    zs.add_identifier(identifier='TOTALLY_REAL_ID',
                      scheme='TOTALLY_REAL_SCHEME')
    pi.import_data([zs.as_dict()])

    za_db = Person.objects.get()
    Membership.objects.create(person_id=za_db.id,
                              organization_id=org.id)

    zs2 = ScrapePerson(name='Zadock Snodgrass', birth_date="1900-01-01")
    zs2.add_identifier(identifier='TOTALLY_REAL_ID',
                       scheme='TOTALLY_REAL_SCHEME')

    # This is contrived and perhaps broken, but we're going to check this.
    # We *really* don't want to *ever* cross jurisdiction bounds.
    PersonImporter('another-jurisdiction').import_data([zs.as_dict()])

    BillImporter('jid', oi, pi).import_data([bill.as_dict()])

    obj = Bill.objects.get()
    (entry,) = obj.sponsorships.all()
    assert entry.person.name == "Zadock Snodgrass"
    assert entry.person.birth_date == "1800-01-01"

示例#8

显示文件

文件： people.py 项目： opencivicdata/scrapers-us-state

    def scrape_legislator(self, legislator_id):
        old = self.api('legislators/' + legislator_id + '?')
        # just not needed
        id = old.pop('id')

        old.pop('created_at')
        old.pop('updated_at')
        old.pop('country', None)
        old.pop('level', None)
        old.pop('state')
        old.pop('leg_id')
        old.pop('active')
        # junk keys
        old.pop('suffix', None)
        old.pop('notice', None)
        old.pop('csrfmiddlewaretoken', None)
        old.pop('office_address', None)
        old.pop('office_phone', None)

        # translated
        district = old.pop('district', None)
        chamber = old.pop('chamber', None)
        image = old.pop('photo_url', '')
        name = old.pop('full_name')
        party = old.pop('party', None)

        if party in ('Nonpartisan', 'unknown', 'Unknown', 'Unaffiliated', "Non Affiliated", " "):
            party = None
        elif party == 'Democrat':
            party = 'Democratic'

        if self.state in('ne', 'dc'):
            chamber = 'legislature'

        if chamber == 'upper' and self.state == 'pr':
            pr_district = {
                '1': 'I',
                '2': 'II',
                '3': 'III',
                '4': 'IV',
                '5': 'V',
                '6': 'VI',
                '7': 'VII',
                '8': 'VIII',
            }
            if district in pr_district:
                district = pr_district[district]

        if '2008-2011' in old:
            old['old_roles']['2008-2011'] = old.pop('2008-2011')

        old_roles = old.pop('old_roles', {})

        if old['roles'] and 'Lt. Governor' in [x['type'] for x in old['roles']]:
            new = Person(name=name, district=district, party=party, image=image)
            self.jurisdiction._executive.add_post(
                'Lt. Governor',
                'lt-gov'
            )
            membership = Membership(
                person_id=new._id,
                role="Lt. Governor",
                organization_id=self.jurisdiction._executive._id
            )
            new._related.append(membership)
        else:
            new = Person(name=name, party=party, image=image)

        if id in birthdays:
            new.birth_date = birthdays[id]

        # various ids
        id_types = {'votesmart_id': 'votesmart',
                    'transparencydata_id': 'influence-explorer',
                    'nimsp_id': 'nimsp',
                    'nimsp_candidate_id': 'nimsp-candidate',
                   }
        for idname, scheme in id_types.items():
            val = old.pop(idname, None)
            if val:
                new.add_identifier(val, scheme=scheme)
        for id in old.pop('all_ids'):
            new.add_identifier(id, scheme='openstates')
            self._people[id] = new

        # contact details
        email = old.pop('email', None)
        if email:
            new.add_contact_detail(type='email', value=email, note='')
        office_keys = {'fax': 'fax',
                       'phone': 'voice',
                       'email': 'email',
                       'address': 'address'}
        for office in old.pop('offices'):
            for key, type in office_keys.items():
                if office.get(key):
                    if 'Office Hours' in office[key] and self.state == 'pa':
                        for x in office[key].split('Office Hours: '):
                            if x:
                                new.add_contact_detail(type=type, value=x, note=office['name'])
                    else:
                        new.add_contact_detail(type=type, value=office[key], note=office['name'])

        # links
        link = old.pop('url', None)
        if link:
            new.add_link(link)

        #for utah, conflict of interest is in links
        if self.state == 'ut':
            links = old.pop('+links',[])
            for l in links:
                new.add_link(note="conflict of interest form",url=l)

        # sources
        for source in old.pop('sources'):
            source.pop('retrieved', None)
            source.pop('+page', None)
            new.add_source(**source)

        # roles
        for role in old.pop('roles'):
            self.process_role(new, role, leg_id=id)

        for role_list in old_roles.values():
            for role in role_list:
                self.process_role(new, role, leg_id=id)

        # ignore most of the names for now
        old.pop('first_name')
        old.pop('middle_name')
        old.pop('suffixes')
        old.pop('nickname', None)
        new.sort_name = old.pop('last_name')

        #some places have legacy names without underscores
        old.pop('+firstname', None)
        old.pop('+lastname', None)

        gender = old.pop('+gender', None)
        if gender:
            new.gender = gender
        biography = old.pop('+biography', None)
        if biography:
            new.biography = biography
        birth_date = old.pop('+birth_date', None)
        if birth_date:
            new.birth_date = birth_date

        # keys to keep
        to_extras = ['+occupation', '+twitter', '+facebook_url', '+sworn_in_date', '+profession',
                     '+secretary', '+office_hours', '+resident_county', '+district_name',
                     '+leg_status', '+legal_position', '+title', '+start_year',
                     '+end_date', 'occupation', '+oregon_member_id',
                     '+facebook', '+youtube', '+instagram']
        for k in to_extras:
            v = old.pop(k, None)
            if v:
                new.extras[k.replace('+', '')] = v

        # keys not to keep
        to_pop = ['+office_fax', '+phone', '+room', '+fax', '+email', '+url', '+photo', '+notice',
                  '+page', '+suffix', '+city', '+address', '+additional_info_url', '+contact_form',
                  '+fax_number', '+phone_number', '+business_phone', '+email_address', '+img_url',
                  '+office_phone', '+disctict_name', '+office_loc', '+leg_url', '+office',
                  '+district_address', '+capital_address', '+bis_phone', '+capital_phone',
                  '+org_info', '+role', '+other_phone', '+home_phone', '+zip', '+zipcode',
                  '+county', '+capitol_phone', '+image_url', '+header', '+town_represented',
                  '+full_address', '+capitol_address', '+website', '+district_phone',
                  '+district_offices', '+party', '+district', '+capitol_office', '+office_address',
                 ]
        for k in to_pop:
            old.pop(k, None)

        # ensure we got it all
        assert not old, old.keys()

        return new

示例#9

显示文件

文件： legislative.py 项目： crdunwel/scrapers-us-federal

    def scrape_current_legislators(self, repos):
        for repo in repos:
            CURRENT_LEGISLATORS = self.get_url(repo)

            people = self.yamlize(CURRENT_LEGISLATORS)
            parties = set()
            posts = {}
            person_cache = defaultdict(lambda: defaultdict(lambda: None))

            for person in people:
                name = person['name'].get('official_full')
                if name is None:
                    name = "{name[first]} {name[last]}".format(**person)

                if 'birthday' in person['bio']:
                    birth_date = person['bio']['birthday']

                who = person_cache[name][birth_date]
                has_term = False

                if who is None:
                    who = Person(name=name, birth_date=birth_date)
                    who.add_source(url=CURRENT_LEGISLATORS, note="unitedstates project on GitHub")

                for term in person.get('terms', []):
                    has_term = True
                    start_date = term['start']
                    end_date = term['end']
                    state = term['state']
                    type_ = term['type']
                    district = term.get('district', None)
                    party = term.get('party', None)

                    chamber = {'rep': self.house,
                               'sen': self.senate}[type_]

                    role = {'rep': 'Representative',
                            'sen': 'Senator'}[type_]

                    if type_ == "rep" and district is not None:
                        label = "%s for District %s in %s" % (role, district, state)

                        division_id = ("ocd-division/country:us/state:{state}".format(state=state.lower()))

                        if district != 0:
                            division_id += "/cd:{district}".format(district=district)

                    if type_ == "sen":
                        label = "Senator for %s" % state

                        division_id = ("ocd-division/country:us/state:{state}".format(state=state.lower()))

                    post = posts.get(division_id)
                    if post is None:
                        post = Post(organization_id=chamber._id,
                            division_id=division_id,
                            label=label, role=role)
                        posts[division_id] = post
                        yield post

                    membership = Membership(
                        post_id=post._id,
                        role=role,
                        label=label,
                        start_date=start_date,
                        end_date=end_date,
                        person_id=who._id,
                        organization_id=chamber._id)
                    yield membership

                    if party == "Democrat":
                        party = "Democratic"

                    if party:
                        membership = Membership(
                            role='member',
                            start_date=start_date,
                            end_date=end_date,
                            person_id=who._id,
                            organization_id=make_pseudo_id(
                                classification="party",
                                name=party))
                        yield membership

                for key, value in person.get('id', {}).items():
                    if isinstance(value, list):
                        for v in value:
                            who.add_identifier(str(v), scheme=key)
                    else:
                        who.add_identifier(str(value), scheme=key)
                        if key == 'bioguide':
                            who.image = self.get_image_url(str(value))

                if has_term:
                    yield who

示例#10

显示文件

    def scrape_legislator(self, legislator_id):
        old = self.api('legislators/' + legislator_id + '?')
        # just not needed
        id = old.pop('id')

        old.pop('created_at')
        old.pop('updated_at')
        old.pop('country', None)
        old.pop('level', None)
        old.pop('state')
        old.pop('leg_id')
        old.pop('active')
        # junk keys
        old.pop('suffix', None)
        old.pop('notice', None)
        old.pop('csrfmiddlewaretoken', None)
        old.pop('office_address', None)
        old.pop('office_phone', None)

        # translated
        district = old.pop('district', None)
        chamber = old.pop('chamber', None)
        image = old.pop('photo_url', '')
        name = old.pop('full_name')
        party = old.pop('party', None)

        if party in ('Nonpartisan', 'unknown', 'Unknown', 'Unaffiliated',
                     "Non Affiliated", " "):
            party = None
        elif party == 'Democrat':
            party = 'Democratic'

        if self.state in ('ne', 'dc'):
            chamber = 'legislature'

        if chamber == 'upper' and self.state == 'pr':
            pr_district = {
                '1': 'I',
                '2': 'II',
                '3': 'III',
                '4': 'IV',
                '5': 'V',
                '6': 'VI',
                '7': 'VII',
                '8': 'VIII',
            }
            if district in pr_district:
                district = pr_district[district]

        if '2008-2011' in old:
            old['old_roles']['2008-2011'] = old.pop('2008-2011')

        old_roles = old.pop('old_roles', {})

        if old['roles'] and 'Lt. Governor' in [
                x['type'] for x in old['roles']
        ]:
            new = Person(name=name,
                         district=district,
                         party=party,
                         image=image)
            self.jurisdiction._executive.add_post('Lt. Governor', 'lt-gov')
            membership = Membership(
                person_id=new._id,
                role="Lt. Governor",
                organization_id=self.jurisdiction._executive._id)
            new._related.append(membership)
        else:
            new = Person(name=name, party=party, image=image)

        if id in birthdays:
            new.birth_date = birthdays[id]

        # various ids
        id_types = {
            'votesmart_id': 'votesmart',
            'transparencydata_id': 'influence-explorer',
            'nimsp_id': 'nimsp',
            'nimsp_candidate_id': 'nimsp-candidate',
        }
        for idname, scheme in id_types.items():
            val = old.pop(idname, None)
            if val:
                new.add_identifier(val, scheme=scheme)
        for id in old.pop('all_ids'):
            new.add_identifier(id, scheme='openstates')
            self._people[id] = new

        # contact details
        email = old.pop('email', None)
        if email:
            new.add_contact_detail(type='email', value=email, note='')
        office_keys = {
            'fax': 'fax',
            'phone': 'voice',
            'email': 'email',
            'address': 'address'
        }
        for office in old.pop('offices'):
            for key, type in office_keys.items():
                if office.get(key):
                    if 'Office Hours' in office[key] and self.state == 'pa':
                        for x in office[key].split('Office Hours: '):
                            if x:
                                new.add_contact_detail(type=type,
                                                       value=x,
                                                       note=office['name'])
                    else:
                        new.add_contact_detail(type=type,
                                               value=office[key],
                                               note=office['name'])

        # links
        link = old.pop('url', None)
        if link:
            new.add_link(link)

        #for utah, conflict of interest is in links
        if self.state == 'ut':
            links = old.pop('+links', [])
            for l in links:
                new.add_link(note="conflict of interest form", url=l)

        # sources
        for source in old.pop('sources'):
            source.pop('retrieved', None)
            source.pop('+page', None)
            new.add_source(**source)

        # roles
        for role in old.pop('roles'):
            self.process_role(new, role, leg_id=id)

        for role_list in old_roles.values():
            for role in role_list:
                self.process_role(new, role, leg_id=id)

        # ignore most of the names for now
        old.pop('first_name')
        old.pop('middle_name')
        old.pop('suffixes')
        old.pop('nickname', None)
        new.sort_name = old.pop('last_name')

        #some places have legacy names without underscores
        old.pop('+firstname', None)
        old.pop('+lastname', None)

        gender = old.pop('+gender', None)
        if gender:
            new.gender = gender
        biography = old.pop('+biography', None)
        if biography:
            new.biography = biography
        birth_date = old.pop('+birth_date', None)
        if birth_date:
            new.birth_date = birth_date

        # keys to keep
        to_extras = [
            '+occupation', '+twitter', '+facebook_url', '+sworn_in_date',
            '+profession', '+secretary', '+office_hours', '+resident_county',
            '+district_name', '+leg_status', '+legal_position', '+title',
            '+start_year', '+end_date', 'occupation', '+oregon_member_id',
            '+facebook', '+youtube', '+instagram'
        ]
        for k in to_extras:
            v = old.pop(k, None)
            if v:
                new.extras[k.replace('+', '')] = v

        # keys not to keep
        to_pop = [
            '+office_fax',
            '+phone',
            '+room',
            '+fax',
            '+email',
            '+url',
            '+photo',
            '+notice',
            '+page',
            '+suffix',
            '+city',
            '+address',
            '+additional_info_url',
            '+contact_form',
            '+fax_number',
            '+phone_number',
            '+business_phone',
            '+email_address',
            '+img_url',
            '+office_phone',
            '+disctict_name',
            '+office_loc',
            '+leg_url',
            '+office',
            '+district_address',
            '+capital_address',
            '+bis_phone',
            '+capital_phone',
            '+org_info',
            '+role',
            '+other_phone',
            '+home_phone',
            '+zip',
            '+zipcode',
            '+county',
            '+capitol_phone',
            '+image_url',
            '+header',
            '+town_represented',
            '+full_address',
            '+capitol_address',
            '+website',
            '+district_phone',
            '+district_offices',
            '+party',
            '+district',
            '+capitol_office',
            '+office_address',
        ]
        for k in to_pop:
            old.pop(k, None)

        # ensure we got it all
        assert not old, old.keys()

        return new

示例#11

显示文件

    def transform_parse(self, parsed_form, response):

        _source = {
            "url": response.url,
            "note": "LDA Form LD-1"
        }

        # basic disclosure fields
        _disclosure = Disclosure(
            effective_date=datetime.strptime(
                parsed_form['datetimes']['effective_date'],
                '%Y-%m-%d %H:%M:%S').replace(tzinfo=UTC),
            timezone='America/New_York',
            submitted_date=datetime.strptime(
                parsed_form['datetimes']['signature_date'],
                '%Y-%m-%d %H:%M:%S').replace(tzinfo=UTC),
            classification="lobbying"
        )

        _disclosure.add_authority(name=self.authority.name,
                                  type=self.authority._type,
                                  id=self.authority._id)

        _disclosure.add_identifier(
            identifier=parsed_form['_meta']['document_id'],
            scheme="urn:sopr:filing"
        )

        # disclosure extras
        _disclosure.extras = {}
        _disclosure.extras['registrant'] = {
            'self_employed_individual': parsed_form['registrant']['self_employed_individual'],
            'general_description': parsed_form['registrant']['registrant_general_description'],
            'signature': {
                "signature_date": parsed_form['datetimes']['signature_date'],
                "signature": parsed_form['signature']
            }
        }

        _disclosure.extras['client'] = {
            'same_as_registrant':
                parsed_form['client']['client_self'],
            'general_description':
                parsed_form['client']['client_general_description']
        }

        _disclosure.extras['registration_type'] = {
            'is_amendment':
                parsed_form['registration_type']['is_amendment'],
            'new_registrant':
                parsed_form['registration_type']['new_registrant'],
            'new_client_for_existing_registrant':
                parsed_form['registration_type'][
                    'new_client_for_existing_registrant'],
        }

        # # Registrant
        # build registrant
        _registrant_self_employment = None

        if parsed_form['registrant']['self_employed_individual']:
            n = ' '.join([p for p in [
                parsed_form['registrant']['registrant_individual_prefix'],
                parsed_form['registrant']['registrant_individual_firstname'],
                parsed_form['registrant']['registrant_individual_lastname']
            ] if len(p) > 0]).strip()

            _registrant = Person(
                name=n,
                source_identified=True
            )

            _registrant_self_employment = Organization(
                name='SELF-EMPLOYMENT of {n}'.format(n=n),
                classification='company',
                source_identified=True
            )

            _registrant.add_membership(
                organization=_registrant_self_employment,
                role='self_employed',
                label='self-employment of {n}'.format(n=n),
                start_date=_disclosure.effective_date.strftime('%Y-%m-%d')
            )
        else:
            _registrant = Organization(
                name=parsed_form['registrant']['registrant_org_name'],
                classification='company',
                source_identified=True
            )

        if len(parsed_form['registrant']['registrant_house_id']) > 0:
            _registrant.add_identifier(
                identifier=parsed_form['registrant']['registrant_house_id'],
                scheme='urn:house_clerk:registrant'
            )

        if len(parsed_form['registrant']['registrant_senate_id']) > 0:
            _registrant.add_identifier(
                identifier=parsed_form['registrant']['registrant_senate_id'],
                scheme='urn:sopr:registrant'
            )

        registrant_contact_details = [
            {
                "type": "address",
                "note": "contact address",
                "value": '; '.join([
                    p for p in [
                        parsed_form['registrant']['registrant_address_one'],
                        parsed_form['registrant']['registrant_address_two'],
                        parsed_form['registrant']['registrant_city'],
                        parsed_form['registrant']['registrant_state'],
                        parsed_form['registrant']['registrant_zip'],
                        parsed_form['registrant']['registrant_country']]
                    if len(p) > 0]).strip(),
            },
            {
                "type": "voice",
                "note": "contact phone",
                "value": parsed_form['registrant']['registrant_contact_phone'],
            },
            {
                "type": "email",
                "note": "contact email",
                "value": parsed_form['registrant']['registrant_contact_email'],
            },
        ]

        registrant_contact_ppb = {
            "type": "address",
            "note": "principal place of business",
            "value": '; '.join([
                p for p in [
                    parsed_form['registrant']['registrant_ppb_city'],
                    parsed_form['registrant']['registrant_ppb_state'],
                    parsed_form['registrant']['registrant_ppb_zip'],
                    parsed_form['registrant']['registrant_ppb_country']]
                if len(p) > 0]).strip(),
        }

        if registrant_contact_ppb["value"]:
            registrant_contact_details.append(registrant_contact_ppb)

        for cd in registrant_contact_details:
            _registrant.add_contact_detail(**cd)

        _registrant.extras = {
            "contact_details_structured": [
                {
                    "type": "address",
                    "note": "contact address",
                    "parts": [
                        {
                            "note": "address_one",
                            "value": parsed_form['registrant'][
                                'registrant_address_one'],
                        },
                        {
                            "note": "address_two",
                            "value": parsed_form['registrant'][
                                'registrant_address_two'],
                        },
                        {
                            "note": "city",
                            "value": parsed_form['registrant'][
                                'registrant_city'],
                        },
                        {
                            "note": "state",
                            "value": parsed_form['registrant'][
                                'registrant_state'],
                        },
                        {
                            "note": "zip",
                            "value": parsed_form['registrant'][
                                'registrant_zip'],
                        },
                        {
                            "note": "country",
                            "value": parsed_form['registrant'][
                                'registrant_country'],
                        }
                    ],
                },
                {
                    "type": "address",
                    "note": "principal place of business",
                    "parts": [
                        {
                            "note": "city",
                            "value": parsed_form['registrant'][
                                'registrant_ppb_city'],
                        },
                        {
                            "note": "state",
                            "value": parsed_form['registrant'][
                                'registrant_ppb_state'],
                        },
                        {
                            "note": "zip",
                            "value": parsed_form['registrant'][
                                'registrant_ppb_zip'],
                        },
                        {
                            "note": "country",
                            "value": parsed_form['registrant'][
                                'registrant_ppb_country'],
                        }
                    ],
                },
            ]
        }

        # # People
        # build contact
        _main_contact = Person(
            name=parsed_form['registrant']['registrant_contact_name'],
            source_identified=True
        )

        main_contact_contact_details = [
            {
                "type": "voice",
                "note": "contact phone",
                "value": parsed_form['registrant']['registrant_contact_phone'],
            },
            {
                "type": "email",
                "note": "contact email",
                "value": parsed_form['registrant']['registrant_contact_email'],
            }
        ]

        for cd in main_contact_contact_details:
            _main_contact.add_contact_detail(**cd)

        if _registrant._type == 'organization':
            _registrant.add_member(
                name_or_person=_main_contact,
                role='main_contact',
                label='main contact for {n}'.format(n=_registrant.name),
                start_date=_disclosure.effective_date.strftime('%Y-%m-%d')
            )
        else:
            _registrant_self_employment.add_member(
                name_or_person=_main_contact,
                role='main_contact',
                label='main contact for {n}'.format(n=_registrant.name),
                start_date=_disclosure.effective_date.strftime('%Y-%m-%d')
            )

        # # Client
        # build client
        _client = Organization(
            name=parsed_form['client']['client_name'],
            classification='company',
            source_identified=True
        )

        client_contact_details = [
            {
                "type": "address",
                "note": "contact address",
                "value": '; '.join([
                    p for p in [
                        parsed_form['client']['client_address'],
                        parsed_form['client']['client_city'],
                        parsed_form['client']['client_state'],
                        parsed_form['client']['client_zip'],
                        parsed_form['client']['client_country']]
                    if len(p) > 0]).strip(),
            },
        ]

        client_contact_ppb = {
            "type": "address",
            "note": "principal place of business",
            "value": '; '.join([
                p for p in [
                    parsed_form['client']['client_ppb_city'],
                    parsed_form['client']['client_ppb_state'],
                    parsed_form['client']['client_ppb_zip'],
                    parsed_form['client']['client_ppb_country']]
                if len(p) > 0]).strip(),
        }

        if client_contact_ppb["value"]:
            client_contact_details.append(client_contact_ppb)

        for cd in client_contact_details:
            _client.add_contact_detail(**cd)

        _client.extras = {
            "contact_details_structured": [
                {
                    "type": "address",
                    "note": "contact address",
                    "parts": [
                        {
                            "note": "address",
                            "value": parsed_form['client']['client_address'],
                        },
                        {
                            "note": "city",
                            "value": parsed_form['client']['client_city'],
                        },
                        {
                            "note": "state",
                            "value": parsed_form['client']['client_state'],
                        },
                        {
                            "note": "zip",
                            "value": parsed_form['client']['client_zip'],
                        },
                        {
                            "note": "country",
                            "value": parsed_form['client']['client_country'],
                        }
                    ],
                },
                {
                    "type": "address",
                    "note": "principal place of business",
                    "parts": [
                        {
                            "note": "city",
                            "value": parsed_form['client']['client_ppb_city'],
                        },
                        {
                            "note": "state",
                            "value": parsed_form['client']['client_ppb_state'],
                        },
                        {
                            "note": "zip",
                            "value": parsed_form['client']['client_ppb_zip'],
                        },
                        {
                            "note": "country",
                            "value": parsed_form['client'][
                                'client_ppb_country'],
                        }
                    ],
                },
            ],
        }

        # Collect Foreign Entities
        _foreign_entities = []
        _foreign_entities_by_name = {}
        for fe in parsed_form['foreign_entities']:
            fe_extras = {}
            fe_name = fe['foreign_entity_name']

            # check for name-based duplicates
            if fe_name in _foreign_entities_by_name:
                _foreign_entity = _foreign_entities_by_name[fe_name]
            else:
                _foreign_entity = Organization(
                    name=fe_name,
                    classification='company',
                    source_identified=True
                )

            # collect contact details
            foreign_entity_contact_details = [
                {
                    "type": "address",
                    "note": "contact address",
                    "value": '; '.join([
                        p for p in [
                            fe['foreign_entity_address'],
                            fe['foreign_entity_city'],
                            fe['foreign_entity_state'],
                            fe['foreign_entity_country']]
                        if len(p) > 0]).strip(),
                },
                {
                    "type": "address",
                    "note": "principal place of business",
                    "value": '; '.join([
                        p for p in [
                            fe['foreign_entity_ppb_state'],
                            fe['foreign_entity_ppb_country']]
                        if len(p) > 0]).strip(),
                },
            ]

            foreign_entity_contact_ppb = {
                "type": "address",
                "note": "principal place of business",
                "value": '; '.join([
                    p for p in [
                        fe['foreign_entity_ppb_city'],
                        fe['foreign_entity_ppb_state'],
                        fe['foreign_entity_ppb_country']]
                    if len(p) > 0]),
            }

            if foreign_entity_contact_ppb["value"]:
                foreign_entity_contact_details.append(
                    foreign_entity_contact_ppb)

            # add contact details
            for cd in foreign_entity_contact_details:
                if cd['value'] != '':
                    _foreign_entity.add_contact_detail(**cd)

            # add extras
            fe_extras["contact_details_structured"] = [
                {
                    "type": "address",
                    "note": "contact address",
                    "parts": [
                        {
                            "note": "address",
                            "value": fe['foreign_entity_address'],
                        },
                        {
                            "note": "city",
                            "value": fe['foreign_entity_city'],
                        },
                        {
                            "note": "state",
                            "value": fe['foreign_entity_state'],
                        },
                        {
                            "note": "country",
                            "value": fe['foreign_entity_country'],
                        }
                    ],
                },
                {
                    "type": "address",
                    "note": "principal place of business",
                    "parts": [
                        {
                            "note": "state",
                            "value": fe['foreign_entity_ppb_state'],
                        },
                        {
                            "note": "country",
                            "value": fe['foreign_entity_ppb_country'],
                        }
                    ],
                },
            ]

            _foreign_entity.extras = combine_dicts(_foreign_entity.extras,
                                                   fe_extras)

            _foreign_entities_by_name[fe_name] = _foreign_entity

        for unique_foreign_entity in _foreign_entities_by_name.values():
            _foreign_entities.append(unique_foreign_entity)

            # TODO: add a variant on memberships to represent inter-org
            # relationships (associations, ownership, etc)
            #
            # _client['memberships'].append({
            #     "id": _foreign_entity['id'],
            #     "classification": "organization",
            #     "name": _foreign_entity['name'],
            #     "extras": {
            #         "ownership_percentage":
            #             fe['foreign_entity_amount']
            #     }
            # })

        # Collect Lobbyists
        # TODO: deal with wierd non-name line continuation cases (blanks, "continued")
        _lobbyists_by_name = {}

        for l in parsed_form['lobbyists']:
            l_extras = {}
            l_name = ' '.join([l['lobbyist_first_name'],
                               l['lobbyist_last_name'],
                               l['lobbyist_suffix']
                               ]).strip()

            if l_name in _lobbyists_by_name:
                _lobbyist = _lobbyists_by_name[l_name]
            else:
                _lobbyist = Person(
                    name=l_name,
                    source_identified=True
                )

            if l['lobbyist_covered_official_position']:
                l_extras['lda_covered_official_positions'] = [
                    {
                        'date_reported':
                            parsed_form['datetimes']['effective_date'],
                        'covered_official_position':
                            l['lobbyist_covered_official_position']
                    },
                ]

            _lobbyist.extras = combine_dicts(_lobbyist.extras, l_extras)

            _lobbyists_by_name[l_name] = _lobbyist

        _lobbyists = []
        for unique_lobbyist in _lobbyists_by_name.values():
            _lobbyists.append(unique_lobbyist)

        if _registrant._type == 'organization':
            for l in _lobbyists:
                _registrant.add_member(
                    l,
                    role='lobbyist',
                    label='lobbyist for {n}'.format(n=_registrant.name),
                    start_date=_disclosure.effective_date.strftime('%Y-%m-%d')
                )
        else:
            for l in _lobbyists:
                _registrant_self_employment.add_member(
                    l,
                    role='lobbyist',
                    label='lobbyist for {n}'.format(n=_registrant.name),
                    start_date=_disclosure.effective_date.strftime('%Y-%m-%d')
                )

        # # Document
        # build document
        _disclosure.add_document(
            note='submitted filing',
            date=parsed_form['datetimes']['effective_date'][:10],
            url=response.url
        )

        # Collect Affiliated orgs
        _affiliated_organizations = []
        _affiliated_organizations_by_name = {}
        for ao in parsed_form['affiliated_organizations']:
            ao_extras = {}
            ao_name = ao['affiliated_organization_name']
            if ao_name in _affiliated_organizations_by_name:
                # There's already one by this name
                _affiliated_organization = _affiliated_organizations_by_name[ao_name]
            else:
                # New affiliated org
                _affiliated_organization = Organization(
                    name=ao_name,
                    classification='company',
                    source_identified=True
                )

            # collect contact details
            affiliated_organization_contact_details = [
                {
                    "type": "address",
                    "note": "contact address",
                    "value": '; '.join([
                        p for p in [
                            ao['affiliated_organization_address'],
                            ao['affiliated_organization_city'],
                            ao['affiliated_organization_state'],
                            ao['affiliated_organization_zip'],
                            ao['affiliated_organization_country']]
                        if len(p) > 0]).strip(),
                },
            ]

            affiliated_organization_contact_ppb = {
                "type": "address",
                "note": "principal place of business",
                "value": '; '.join([
                    p for p in [
                        ao['affiliated_organization_ppb_city'],
                        ao['affiliated_organization_ppb_state'],
                        ao['affiliated_organization_ppb_country']]
                    if len(p) > 0]).strip(),
            }

            if affiliated_organization_contact_ppb["value"]:
                affiliated_organization_contact_details.append(
                    affiliated_organization_contact_ppb)

            # add contact details
            for cd in affiliated_organization_contact_details:
                _affiliated_organization.add_contact_detail(**cd)

            ao_extras["contact_details_structured"] = [
                {
                    "type": "address",
                    "note": "contact address",
                    "parts": [
                        {
                            "note": "address",
                            "value": ao['affiliated_organization_address'],
                        },
                        {
                            "note": "city",
                            "value": ao['affiliated_organization_city'],
                        },
                        {
                            "note": "state",
                            "value": ao['affiliated_organization_state'],
                        },
                        {
                            "note": "zip",
                            "value": ao['affiliated_organization_zip'],
                        },
                        {
                            "note": "country",
                            "value": ao['affiliated_organization_country'],
                        }
                    ],
                },
                {
                    "type": "address",
                    "note": "principal place of business",
                    "parts": [
                        {
                            "note": "city",
                            "value":
                                ao['affiliated_organization_ppb_city'],
                        },
                        {
                            "note": "state",
                            "value":
                                ao['affiliated_organization_ppb_state'],
                        },
                        {
                            "note": "country",
                            "value":
                                ao['affiliated_organization_ppb_country'],
                        }
                    ],
                },
            ],

            _affiliated_organization.extras = combine_dicts(
                _affiliated_organization.extras, ao_extras)

        for unique_affiliated_organization in _affiliated_organizations_by_name.values():
            _affiliated_organizations.append(unique_affiliated_organization)

        # # Events & Agendas
        # name
        if parsed_form['registration_type']['new_registrant']:
            registration_type = 'New Client, New Registrant'
        elif parsed_form['registration_type']['is_amendment']:
            registration_type = 'Amended Registration'
        else:
            registration_type = 'New Client for Existing Registrant'

        # Create registration event
        _event = Event(
            name="{rn} - {rt}, {cn}".format(rn=_registrant.name,
                                            rt=registration_type,
                                            cn=_client.name),
            timezone='America/New_York',
            location='United States',
            start_time=datetime.strptime(
                parsed_form['datetimes']['effective_date'],
                '%Y-%m-%d %H:%M:%S').replace(tzinfo=UTC),
            classification='registration'
        )

        # add participants
        _event.add_participant(type=_registrant._type,
                               id=_registrant._id,
                               name=_registrant.name,
                               note="registrant")

        if _registrant._type == 'person':
            _event.add_participant(type=_registrant._type,
                                   id=_registrant._id,
                                   name=_registrant.name,
                                   note="registrant")

        _event.add_participant(type=_client._type,
                               id=_client._id,
                               name=_client.name,
                               note="client")

        for l in _lobbyists:
            _event.add_participant(type=l._type,
                                   id=l._id,
                                   name=l.name,
                                   note='lobbyist')

        for fe in _foreign_entities:
            _event.add_participant(type=fe._type,
                                   id=fe._id,
                                   name=fe.name,
                                   note='foreign_entity')

        for ao in _affiliated_organizations:
            _event.add_participant(type=ao._type,
                                   id=ao._id,
                                   name=ao.name,
                                   note='affiliated_organization')

        # add agenda item
        _agenda = _event.add_agenda_item(
            description='issues lobbied on',
        )

        _agenda['notes'].append(
            parsed_form['lobbying_issues_detail']
        )

        for li in parsed_form['lobbying_issues']:
            if li['general_issue_area'] != '':
                _agenda.add_subject(li['general_issue_area'])

        _disclosure.add_disclosed_event(
            name=_event.name,
            type=_event._type,
            classification=_event.classification,
            id=_event._id
        )

        # add registrant to disclosure's _related and related_entities fields
        _disclosure.add_registrant(name=_registrant.name,
                                   type=_registrant._type,
                                   id=_registrant._id)

        _registrant.add_source(
            url=_source['url'],
            note='registrant'
        )
        yield _registrant

        if _registrant_self_employment is not None:
            _registrant_self_employment.add_source(
                url=_source['url'],
                note='registrant_self_employment'
            )

            yield _registrant_self_employment

        _client.add_source(
            url=_source['url'],
            note='client'
        )
        yield _client

        _main_contact.add_source(
            url=_source['url'],
            note='main_contact'
        )
        yield _main_contact

        for ao in _affiliated_organizations:
            ao.add_source(
                url=_source['url'],
                note='affiliated_organization'
            )
            yield ao
        for fe in _foreign_entities:
            fe.add_source(
                url=_source['url'],
                note='foreign_entity'
            )
            yield fe
        for l in _lobbyists:
            l.add_source(
                url=_source['url'],
                note='lobbyist'
            )
            yield l

        _event.add_source(**_source)
        yield _event
        _disclosure.add_source(**_source)
        yield _disclosure

示例#12

显示文件

文件： legislative.py 项目： influence-usa/scrapers-us-federal

    def scrape_current_legislators(self, repos):
        for repo in repos:
            CURRENT_LEGISLATORS = self.get_url(repo)

            people = self.yamlize(CURRENT_LEGISLATORS)
            parties = set()
            posts = {}
            person_cache = defaultdict(lambda: defaultdict(lambda: None))

            for person in people:
                name = person['name'].get('official_full')
                if name is None:
                    name = "{name[first]} {name[last]}".format(**person)

                if 'birthday' in person['bio']:
                    birth_date = person['bio']['birthday']

                who = person_cache[name][birth_date]
                has_term = False

                if who is None:
                    who = Person(name=name, birth_date=birth_date)
                    who.add_source(url=CURRENT_LEGISLATORS,
                                   note="unitedstates project on GitHub")

                for term in person.get('terms', []):
                    has_term = True
                    start_date = term['start']
                    end_date = term['end']
                    state = term['state']
                    type_ = term['type']
                    district = term.get('district', None)
                    party = term.get('party', None)

                    chamber = {
                        'rep': 'lower',
                        'sen': 'upper',
                    }[type_]

                    role = {
                        'rep': 'Representative',
                        'sen': 'Senator',
                    }[type_]

                    if type_ == "rep" and district is not None:
                        label = "%s for District %s in %s" % (role, district,
                                                              state)

                        if district == 0:
                            division_id = (
                                "ocd-division/country:us/state:{state}".format(
                                    state=state.lower()))
                        else:
                            division_id = (
                                "ocd-division/country:us/"
                                "state:{state}/cd:{district}".format(
                                    state=state.lower(), district=district))

                        post = posts.get(division_id)
                        if post is None:
                            post = Post(organization_id={
                                "rep": self.house,
                                "sen": self.senate
                            }[type_]._id,
                                        division_id=division_id,
                                        label=label,
                                        role=role)
                            posts[division_id] = post
                            yield post

                        membership = Membership(post_id=post._id,
                                                role=role,
                                                label=label,
                                                start_date=start_date,
                                                end_date=end_date,
                                                person_id=who._id,
                                                organization_id={
                                                    "rep": self.house,
                                                    "sen": self.senate,
                                                }[type_]._id)
                        yield membership

                    if type_ == "sen":

                        division_id = (
                            "ocd-division/country:us/state:{state}".format(
                                state=state.lower()))

                        label = "Senitor for %s" % (state)

                        post = posts.get(division_id)
                        if post is None:
                            post = Post(organization_id={
                                "rep": self.house,
                                "sen": self.senate
                            }[type_]._id,
                                        division_id=division_id,
                                        label=label,
                                        role=role)
                            posts[division_id] = post
                            yield post

                        membership = Membership(post_id=post._id,
                                                role=role,
                                                label=label,
                                                start_date=start_date,
                                                end_date=end_date,
                                                person_id=who._id,
                                                organization_id={
                                                    "rep": self.house,
                                                    "sen": self.senate,
                                                }[type_]._id)
                        yield membership

                    if party == "Democrat":
                        party = "Democratic"

                    if party:
                        membership = Membership(role='member',
                                                start_date=start_date,
                                                end_date=end_date,
                                                person_id=who._id,
                                                organization_id=make_pseudo_id(
                                                    classification="party",
                                                    name=party))
                        yield membership

                for key, value in person.get('id', {}).items():
                    if isinstance(value, list):
                        for v in value:
                            who.add_identifier(str(v), scheme=key)
                    else:
                        who.add_identifier(str(value), scheme=key)

                if has_term:
                    yield who