Пример #1
0
def scrape_person(context, doc, url):
    hierarchy = doc.find(
        './/span[@itemtype="http://data-vocabulary.org/Breadcrumb"]')
    # Remove empty items in the list
    hierarchy = [
        item.text_content() for item in hierarchy
        if item.text_content() and item.text_content().strip()
    ]
    # Strip first item ('institution') and last item ('name of person')
    hierarchy = hierarchy[1:-1]

    name = doc.find('.//h3[@itemprop="name"]').text_content()
    title = doc.findtext('.//td[@itemprop="jobTitle"]')
    entity_id = make_id(name, title)
    entity = Entity.create('eu-whoiswho', entity_id)
    entity.name = name
    entity.url = url
    entity.function = title

    address = entity.create_address()
    address.street = doc.findtext('.//span[@itemprop="streetAddress"]')
    address.postal_code = doc.findtext('.//span[@itemprop="postalCode"]')
    address.text = doc.findtext('.//span[@itemprop="addressLocality"]')
    # address.phone = doc.findtext('.//span[@itemprop="telephone"]')

    if len(hierarchy) > 1:
        entity.program = hierarchy[1]

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #2
0
def scrape_entity(context, data):
    row = data.get("row")
    legislature = data.get("legislature")
    country = data.get("country")
    if row.get('id') is None:
        context.log.warning("No ID for entry: %r", row)
    entity = Entity.create('everypolitician', row.get('id'))
    entity.type = entity.TYPE_INDIVIDUAL
    entity.updated_at = parse_ts(legislature.get('lastmod'))
    entity.name = row.get('name')
    entity.function = row.get('group')
    entity.program = legislature.get('name')
    entity.gender = GENDERS[row.get('gender')]

    nationality = entity.create_nationality()
    nationality.country = country.get('name')
    nationality.country_code = country.get('code')

    if row.get('name') != row.get('sort_name'):
        alias = entity.create_alias()
        alias.name = row.get('sort_name')

    # TODO: email
    # TODO: socialmedia
    # TODO: photograph
    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #3
0
def parse_entry(context, entry):
    type_ = ENTITY_TYPES[entry.get('Type')]
    entity = Entity.create('eu-eeas-sanctions', entry.get('Id'))
    entity.type = type_
    entity.updated_at = entry.get('reg_date')
    entity.url = entry.get('pdf_link')
    entity.program = entry.get('programme')
    entity.summary = entry.get('remark')

    for name in entry.findall('./NAME'):
        if entity.name is None:
            obj = entity
        else:
            obj = entity.create_alias()

        obj.title = name.findtext('./TITLE')
        obj.name = name.findtext('./WHOLENAME')
        obj.first_name = name.findtext('./FIRSTNAME')
        obj.second_name = name.findtext('./MIDDLENAME')
        obj.last_name = name.findtext('./LASTNAME')

        if entity.function is None:
            entity.function = name.findtext('./FUNCTION')

        if entity.gender is None:
            entity.gender = GENDERS[name.findtext('./GENDER')]

    for passport in entry.findall('./PASSPORT'):
        identifier = entity.create_identifier()
        identifier.type = Identifier.TYPE_PASSPORT
        identifier.number = passport.findtext('./NUMBER')
        identifier.country = passport.findtext('./COUNTRY')

    for node in entry.findall('./ADDRESS'):
        address = entity.create_address()
        address.street = node.findtext('./STREET')
        address.street_2 = node.findtext('./NUMBER')
        address.city = node.findtext('./CITY')
        address.postal_code = node.findtext('./ZIPCODE')
        address.country = node.findtext('./COUNTRY')

    for birth in entry.findall('./BIRTH'):
        place = stringify(birth.findtext('./PLACE'))
        country = stringify(birth.findtext('./COUNTRY'))
        if place is not None or country is not None:
            birth_place = entity.create_birth_place()
            birth_place.place = place
            birth_place.country = country

        date_ = stringify(parse_date(birth.findtext('./DATE')))
        if date_ is not None:
            birth_date = entity.create_birth_date()
            birth_date.date = date_

    for country in entry.findall('./CITIZEN/COUNTRY'):
        nationality = entity.create_nationality()
        nationality.country = country.text

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #4
0
def handle_organisation(context, data):
    header = ["No",
              "Name",
              "Reason for inclusion",
              "Category of entity",
              "Date of inclusion"]
    data = {key: value for key, value in zip(header, data)}

    entity_id = make_id(data["Name"], data["Reason for inclusion"])
    entity = Entity.create("kg-fiu-national", entity_id)
    entity.type = entity.TYPE_ENTITY

    if "," in data["Name"]:
        data["Name"] = data["Name"].split(",")
    else:
        data["Name"] = [data["Name"]]
    entity.name = data["Name"][0]
    for alias in data["Name"][1:]:
        entity.create_alias(alias)

    entity.program = data["Category of entity"]
    entity.summary = data["Reason for inclusion"]
    entity.listed_at = data["Date of inclusion"]

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #5
0
def handle_individual(context, data):
    header = ["No", "Last Name", "Name", "Middle Name", "Date of birth",
              "Place of birth", "Reason for inclusion",
              "Category of entity", "Date of inclusion"]
    data = {key: value for key, value in zip(header, data)}

    entity_id = make_id(data["Last Name"],
                        data["Middle Name"],
                        data["Name"],
                        data["Reason for inclusion"])
    entity = Entity.create("kg-fiu-national", entity_id)
    entity.type = entity.TYPE_INDIVIDUAL
    entity.last_name = data["Last Name"]
    entity.first_name = data["Name"]
    entity.second_name = data["Middle Name"]
    birth_date = entity.create_birth_date()
    birth_date.date = data["Date of birth"]
    birth_place = entity.create_birth_date()
    birth_place.place = data["Place of birth"]
    entity.program = data["Category of entity"]
    entity.summary = data["Reason for inclusion"]
    entity.listed_at = data["Date of inclusion"]

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #6
0
def parse_entry(context, entry):
    uid = entry.findtext('number-entry')
    entity = Entity.create('ua-sdfm-blacklist', uid)
    entity.type = ENTITY_TYPES[entry.findtext('./type-entry')]
    entity.program = entry.findtext('./program-entry')
    entity.summary = entry.findtext('./comments')
    entity.url = 'http://www.sdfm.gov.ua/articles.php?cat_id=87&lang=en'
    date_entry = entry.findtext('./date-entry')
    if date_entry:
        date_entry = datetime.strptime(date_entry, '%Y%m%d')
        entity.updated_at = date_entry.date().isoformat()

    for aka in entry.findall('./aka-list'):
        if aka.findtext('type-aka') == 'N':
            obj = entity
        else:
            obj = entity.create_alias()
            obj.type = aka.findtext('./category-aka')
            obj.description = aka.findtext('./type-aka')
            obj.quality = ALIAS_QUALITY[aka.findtext('./quality-aka')]
        obj.first_name = aka.findtext('./aka-name1')
        obj.second_name = aka.findtext('./aka-name2')
        obj.third_name = aka.findtext('./aka-name3')
        obj.last_name = aka.findtext('./aka-name4')

    for node in entry.findall('./title-list'):
        entity.title = node.text

    for doc in entry.findall('./document-list'):
        identifier = entity.create_identifier()
        identifier.type = Identifier.TYPE_PASSPORT
        identifier.description = doc.findtext('./document-reg')
        identifier.number = doc.findtext('./document-id')
        identifier.country = doc.findtext('./document-country')

    for doc in entry.findall('./id-number-list'):
        identifier = entity.create_identifier()
        identifier.type = Identifier.TYPE_NATIONALID
        identifier.description = doc.text

    for node in entry.findall('./address-list'):
        address = entity.create_address()
        address.text = node.findtext('./address')

    for pob in entry.findall('./place-of-birth-list'):
        birth_place = entity.create_birth_place()
        birth_place.place = pob.text

    for dob in entry.findall('./date-of-birth-list'):
        birth_date = entity.create_birth_date()
        birth_date.date = parse_date(context, dob.text)

    for nat in entry.findall('./nationality-list'):
        nationality = entity.create_nationality()
        nationality.country = nat.text

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #7
0
def parse_entry(context, entry, url, updated_at):
    uid = entry.findtext('uid')
    type_ = ENTITY_TYPES[entry.findtext('./sdnType')]
    if type_ is None:
        return
    entity = Entity.create('us-ofac', make_id(url, uid))
    entity.type = type_
    entity.updated_at = updated_at
    programs = [p.text for p in entry.findall('./programList/program')]
    entity.program = '; '.join(programs)
    entity.summary = entry.findtext('./remarks')
    entity.function = entry.findtext('./title')
    entity.first_name = entry.findtext('./firstName')
    entity.last_name = entry.findtext('./lastName')

    for aka in entry.findall('./akaList/aka'):
        alias = entity.create_alias()
        alias.first_name = aka.findtext('./firstName')
        alias.last_name = aka.findtext('./lastName')
        alias.type = aka.findtext('./type')
        alias.quality = ALIAS_QUALITY[aka.findtext('./category')]

    for ident in entry.findall('./idList/id'):
        type_ = ID_TYPES.get(ident.findtext('./idType'), Identifier.TYPE_OTHER)
        if type_ is None:
            continue
        identifier = entity.create_identifier()
        identifier.type = type_
        identifier.number = ident.findtext('./idNumber')
        identifier.country = ident.findtext('./idCountry')
        identifier.description = ident.findtext('./idType')

    for addr in entry.findall('./addressList/address'):
        address = entity.create_address()
        address.street = addr.findtext('./address1')
        address.street_2 = addr.findtext('./address2')
        address.city = addr.findtext('./city')
        address.country = addr.findtext('./country')

    for pob in entry.findall('./placeOfBirthList/placeOfBirthItem'):
        birth_place = entity.create_birth_place()
        birth_place.place = pob.findtext('./placeOfBirth')
        birth_place.quality = BirthPlace.QUALITY_WEAK
        if pob.findtext('./mainEntry') == 'true':
            birth_place.quality = BirthPlace.QUALITY_STRONG

    for pob in entry.findall('./dateOfBirthList/dateOfBirthItem'):
        birth_date = entity.create_birth_date()
        birth_date.date = stringify(parse_date(pob.findtext('./dateOfBirth')))
        birth_date.quality = BirthDate.QUALITY_WEAK
        if pob.findtext('./mainEntry') == 'true':
            birth_date.quality = BirthDate.QUALITY_STRONG

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #8
0
def parse_common(node):
    entity = Entity.create('un-sc-sanctions', node.findtext('./DATAID'))
    entity.program = '%s (%s)' % (node.findtext('./UN_LIST_TYPE').strip(),
                                  node.findtext('./REFERENCE_NUMBER').strip())
    entity.summary = node.findtext('./COMMENTS1')
    entity.function = node.findtext('./DESIGNATION/VALUE')
    entity.listed_at = node.findtext('./LISTED_ON')
    entity.updated_at = node.findtext('./LAST_DAY_UPDATED/VALUE')
    entity.name = node.findtext('./NAME_ORIGINAL_SCRIPT')
    entity.first_name = node.findtext('./FIRST_NAME')
    entity.second_name = node.findtext('./SECOND_NAME')
    entity.third_name = node.findtext('./THIRD_NAME')
    return entity
Пример #9
0
def parse_entry(context, node):
    entity = Entity.create('eu-meps', node.findtext('.//id'))
    entity.type = Entity.TYPE_INDIVIDUAL
    entity.name = node.findtext('.//fullName')
    entity.first_name, entity.last_name = split_name(entity.name)

    group = node.findtext('.//nationalPoliticalGroup') or ''
    entity.summary = '%s (%s)' % (node.findtext('.//politicalGroup')
                                  or '', group)

    nationality = entity.create_nationality()
    nationality.country = node.findtext('.//country')
    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #10
0
def scrape_case(context, data):
    url = data.get('url')
    res = context.http.get(url)
    doc = res.html
    name = element_text(doc.find('.//div[@class="nom_fugitif_wanted"]'))
    if name is None or name == 'Identity unknown':
        return
    uid = make_id(url)
    entity = Entity.create('interpol-red-notices', uid)
    entity.url = url
    entity.type = entity.TYPE_INDIVIDUAL
    entity.name = name
    entity.program = element_text(doc.find('.//span[@class="nom_fugitif_wanted_small"]'))  # noqa

    if ', ' in name:
        last, first = name.split(', ', 1)
        alias = entity.create_alias()
        alias.name = ' '.join((first, last))

    for row in doc.findall('.//div[@class="bloc_detail"]//tr'):
        title, value = row.findall('./td')
        name = slugify(element_text(title), sep='_')
        value = element_text(value)
        if value is None:
            continue
        if name == 'charges':
            entity.summary = value
        elif name == 'present_family_name':
            entity.last_name = value
        elif name == 'forename':
            entity.first_name = value
        elif name == 'nationality':
            for country in value.split(', '):
                nationality = entity.create_nationality()
                nationality.country = country
        elif name == 'sex':
            entity.gender = SEXES[value]
        elif name == 'date_of_birth':
            birth_date = entity.create_birth_date()
            birth_date.date = value.split('(')[0]
        elif name == 'place_of_birth':
            birth_place = entity.create_birth_place()
            birth_place.date = value

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #11
0
def parse_entry(context, entry):
    link = entry.find('.//a')
    url_template = context.params.get('url')
    url = urljoin(url_template, link.get('href'))
    _, member_id = url.rsplit('=', 1)
    entity = Entity.create('coe_assembly', member_id)
    entity.type = Entity.TYPE_INDIVIDUAL
    entity.url = url
    entity.last_name, entity.first_name = link.text.split(', ', 1)
    entity.function = entry.findtext('.//span[@class="fonction"]')
    role, country = entry.findall('.//span[@class="infos"]')
    entity.summary = role.text_content().strip()
    nationality = entity.create_nationality()
    nationality.country = country.text_content().strip()

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #12
0
def parse_row(context, data):
    row = data.get('row')
    uid = make_id(row.get('Effective_Date'), row.get('Name'))
    entity = Entity.create('us-bis-denied', uid)
    entity.type = Entity.TYPE_ENTITY
    entity.name = row.get('Name')
    entity.updated_at = row.get('Effective_Date')
    entity.program = row.get('FR_Citation')
    entity.summary = row.get('Action')
    address = entity.create_address()
    address.street = row.get('Street_Address')
    address.postal_code = row.get('Postal_Code')
    address.region = row.get('State')
    address.city = row.get('City')
    address.country = row.get('Country')

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #13
0
def parse_entry(context, entry):
    url = entry.get('href')
    res = context.http.get('https://www.worldpresidentsdb.com/' + url)
    doc = res.html
    content = doc.find('.//main/div')

    uid = make_id(url)

    entity = Entity.create('worldpresidentsdb', uid)
    entity.type = Entity.TYPE_INDIVIDUAL
    entity.function = 'President'
    entity.url = url
    entity.first_name, entity.last_name = content.find('h1').text.split(' ', 1)

    for element in content.findall('.//p'):
        type = element.find('.//b')

        if type is None:
            continue
        else:
            type = type.text

        if type == 'Country:':
            nationality = entity.create_nationality()
            nationality.country = element.find('a').text
        elif type == 'Date of Birth:':
            value = element[0].tail.strip()
            month, day, year = value.split('-', 2)
            birth_date = entity.create_birth_date()
            birth_date.date = year + '-' + month + '-' + day
            birth_date.quality = 'strong'
        elif type == 'Birth Place:':
            value = element[0].tail.strip()
            birth_place = entity.create_birth_place()
            birth_place.place = value
        elif type == 'Political Party:':
            value = element[0].tail.strip()
            entity.program = value
        elif type == 'Other Political Titles:':
            value = element[0].tail.strip()
            entity.summary = value
    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #14
0
def parse_entity(context, url, country, component, row, updated_at):
    function = element_text(row.find('.//span[@class="title"]'))
    if function is None:
        return
    name = element_text(row.find('.//span[@class="cos_name"]'))
    if name is None:
        return

    uid = make_id(country, name, function)
    entity = Entity.create('us-cia-world-leaders', uid)
    entity.name = name
    entity.type = entity.TYPE_INDIVIDUAL
    entity.function = function
    entity.program = country
    entity.url = url
    entity.updated_at = updated_at
    nationality = entity.create_nationality()
    nationality.country = country

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #15
0
def parse(context, data):
    url = context.params.get('url')
    res = context.http.rehash(data)
    doc = res.html
    for table in doc.findall('.//table'):
        if 'List of Debarred' not in table.get('summary', ''):
            continue
        rows = table.findall('.//tr')
        for row in rows:
            tds = row.findall('./td')
            if len(tds) != 6:
                continue
            values = [clean_value(td) for td in tds]
            uid = sha1()
            for value in values:
                uid.update(value.encode('utf-8'))
            uid = uid.hexdigest()[:10]

            names = clean_name(values[0])
            if not len(names):
                context.log.warning("No name: %r", values)
                continue

            entity = Entity.create('zz-wb-debarred', uid)
            entity.program = values[5]
            entity.name = names[0]
            entity.updated_at = dateutil_parse(values[3]).date().isoformat()
            entity.url = url
            for name in names[1:]:
                entity.create_alias(name=name)
            
            nationality = entity.create_nationality()
            nationality.country = values[2]

            address = entity.create_address()
            address.text = values[1]
            address.country = values[2]

            # pprint(entity.to_dict())
            context.emit(data=entity.to_dict())
Пример #16
0
def parse_entry(context, data):
    rows = data.get('rows')
    primary = rows[0]
    if slugify(primary.get('type', '')) == 'individual':
        type_ = Entity.TYPE_INDIVIDUAL
    else:
        type_ = Entity.TYPE_ENTITY

    entity = Entity.create('au-dfat-sanctions', primary.get('reference'))
    entity.type = type_
    entity.url = 'http://dfat.gov.au/international-relations/security/sanctions/Pages/sanctions.aspx'  # noqa
    entity.name = primary.get('name_of_individual_or_entity', '')
    entity.program = primary.get('committees', '')
    entity.summary = primary.get('additional_information', '')

    country = primary.get('citizenship', '')
    if not isinstance(country, float):  # not NaN
        nationality = entity.create_nationality()
        nationality.country = country

    address = entity.create_address()
    address.text = primary.get('address', '')

    birth_date_text = primary.get('date_of_birth', '')
    if not isinstance(birth_date_text, float):
        birth_date = entity.create_birth_date()
        birth_date.date = birth_date_text

    birth_place_text = primary.get('place_of_birth', '')
    if not isinstance(birth_place_text, float):
        birth_place = entity.create_birth_place()
        birth_place.place = birth_place_text

    if rows[1:]:
        for row in rows[1:]:
            alias = entity.create_alias()
            alias.name = row.get('name_of_individual_or_entity', '')

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #17
0
def parse_entry(context, data):
    rows = data.get('rows')
    primary = rows[0]
    if primary['Type'] == 'Individual':
        type_ = Entity.TYPE_INDIVIDUAL
    else:
        type_ = Entity.TYPE_ENTITY

    entity = Entity.create('au-dfat-sanctions', primary.get('Reference'))
    entity.type = type_
    entity.url = 'http://dfat.gov.au/international-relations/security/sanctions/Pages/sanctions.aspx'  # noqa
    entity.name = primary['Name of Individual or Entity']
    entity.program = primary['Committees']
    entity.summary = primary['Additional Information']

    country = primary['Citizenship']
    if not isinstance(country, float):  # not NaN
        nationality = entity.create_nationality()
        nationality.country = country

    address = entity.create_address()
    address.text = primary['Address']

    birth_date_text = primary['Date of Birth']
    if not isinstance(birth_date_text, float):
        birth_date = entity.create_birth_date()
        birth_date.date = birth_date_text

    birth_place_text = primary['Place of Birth']
    if not isinstance(birth_place_text, float):
        birth_place = entity.create_birth_place()
        birth_place.place = birth_place_text

    if rows[1:]:
        for row in rows[1:]:
            alias = entity.create_alias()
            alias.name = row['Name of Individual or Entity']

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #18
0
def parse_entry(context, target, updated_at, sanctions, places):
    node = target.find('./individual')
    type_ = Entity.TYPE_INDIVIDUAL
    if node is None:
        node = target.find('./entity')
        type_ = Entity.TYPE_ENTITY
    if node is None:
        # node = target.find('./object')
        # TODO: build out support for these!
        return

    entity = Entity.create('ch-seco-sanctions', target.get('ssid'))
    entity.type = type_
    entity.updated_at = updated_at
    entity.program = sanctions.get(target.get('sanctions-set-id'))
    entity.function = node.findtext('./other-information')
    entity.summary = node.findtext('./justification')

    for inode in node.findall('./identity'):
        parse_identity(entity, inode, places)

    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())
Пример #19
0
def crawl_officer(context, data):
    officer_id = data.get('officer_id')
    for type_ in ('natural', 'corporate'):
        url = API_URL % (type_, officer_id)
        res = context.http.get(url, auth=AUTH)
        if res.status_code != 200:
            continue
        # TODO: check if this existed
        entity = Entity.create('gb-coh-disqualified', officer_id)
        data = res.json
        entity.title = data.get('title')
        entity.first_name = data.get('forename')
        entity.second_name = data.get('other_forenames')
        entity.last_name = data.get('surname')
        entity.summary = data.get('kind')
        entity.url = urljoin(WEB_URL, data.get('links', {}).get('self', '/'))

        if data.get('date_of_birth'):
            birth_date = entity.create_birth_date()
            birth_date.date = data.get('date_of_birth')

        if data.get('nationality'):
            nationality = entity.create_nationality()
            nationality.country = data.get('nationality')

        for disqualification in data.get('disqualifications', []):
            entity.program = disqualification.get('case_identifier')
            addr = disqualification.get('address')
            address = entity.create_address()
            address.street = addr.get('address_line_1')
            address.street_2 = addr.get('address_line_2')
            address.city = addr.get('locality')
            address.region = addr.get('region')
            address.postal_code = addr.get('postal_code')

        # pprint(entity.to_dict())
        context.emit(data=entity.to_dict())
Пример #20
0
def parse_entry(context, data):
    group = data.get('group')
    rows = data.get('rows')
    seen = defaultdict(set)
    entity = Entity.create('gb-hmt-sanctions', group)
    for row in rows:
        entity.type = ENTITY_TYPES[row.pop('Group Type')]

        names = (row.pop('Name 1'), row.pop('Name 2'), row.pop('Name 3'),
                 row.pop('Name 4'), row.pop('Name 5'), row.pop('Name 6'))
        names = [n for n in names if n is not None]
        row['_name'] = ' '.join(names)

        if fresh_value(seen, row, '_name'):
            name = entity
            if entity.name is not None:
                name = entity.create_alias()
                name.type = row.get('Alias Type')
            name.title = row.get('Title')
            name.last_name = names.pop()
            if len(names):
                name.first_name = names.pop(0)
            if len(names):
                name.second_name = names.pop(0)
            if len(names):
                name.third_name = ' '.join(names)

        if row.get('Regime'):
            entity.program = row.pop('Regime')
        if row.get('Position'):
            entity.function = row.pop('Position')
        if row.get('Other Information'):
            entity.summary = row.pop('Other Information')
        if row.get('Last Updated'):
            entity.updated_at = row.pop('Last Updated')

        if fresh_value(seen, row, 'DOB'):
            dob_text = row.get('DOB')
            if dob_text is None or not len(dob_text.strip()):
                continue
            dob = parse_date(dob_text)
            if dob is None and '/' in dob_text:
                _, dob = dob_text.rsplit('/', 1)
            birth_date = entity.create_birth_date()
            birth_date.date = stringify(dob)

        if fresh_value(seen, row, 'Town of Birth') or \
           fresh_value(seen, row, 'Country of Birth'):
            birth_place = entity.create_birth_place()
            birth_place.place = row.pop('Town of Birth')
            birth_place.country = row.pop('Country of Birth')

        addr = [
            row.pop('Address 1'),
            row.pop('Address 2'),
            row.pop('Address 3'),
            row.pop('Address 4'),
            row.pop('Address 5'),
            row.pop('Address 6')
        ]
        addr_ids = addr + [row.get('Post/Zip Code'), row.get('Post/Zip Code')]
        row['_addr'] = ' '.join([a for a in addr_ids if a is not None])
        if fresh_value(seen, row, '_addr'):
            address = entity.create_address()
            address.country = row.pop('Country')
            address.postal_code = row.pop('Post/Zip Code')
            address.text = ', '.join([a for a in addr if a is not None])

        if fresh_value(seen, row, 'Passport Details'):
            identifier = entity.create_identifier()
            identifier.type = Identifier.TYPE_PASSPORT
            identifier.number = row.pop('Passport Details')
            identifier.country = row.get('Nationality')

        if fresh_value(seen, row, 'NI Number'):
            identifier = entity.create_identifier()
            identifier.type = Identifier.TYPE_NATIONALID
            identifier.number = row.pop('NI Number')
            identifier.country = row.get('Nationality')

        if fresh_value(seen, row, 'Nationality'):
            has_match = False
            text = row.pop('Nationality')
            for name in text.split(')'):
                code = name
                if code is not None:
                    nationality = entity.create_nationality()
                    nationality.country = name
                    has_match = True
            if not has_match:
                nationality = entity.create_nationality()
                nationality.country = text
    # pprint(entity.to_dict())
    context.emit(data=entity.to_dict())