Пример #1
0
def parse_individual(emitter, node):
    person = emitter.make('Person')
    sanction = parse_common(emitter, person, node)
    person.add('title', values(node.find('./TITLE')))
    firstName = node.findtext('./FIRST_NAME')
    secondName = node.findtext('./SECOND_NAME')
    thirdName = node.findtext('./THIRD_NAME')
    name = jointext(firstName, secondName, thirdName)
    person.add('name', name)
    person.add('firstName', firstName)
    person.add('secondName', secondName)
    person.add('middleName', thirdName)
    person.add('position', values(node.find('./DESIGNATION')))

    for alias in node.findall('./INDIVIDUAL_ALIAS'):
        parse_alias(person, alias)

    for addr in node.findall('./INDIVIDUAL_ADDRESS'):
        parse_address(person, addr)

    for doc in node.findall('./INDIVIDUAL_DOCUMENT'):
        passport = emitter.make('Passport')
        number = doc.findtext('./NUMBER')
        date = doc.findtext('./DATE_OF_ISSUE')
        type_ = doc.findtext('./TYPE_OF_DOCUMENT')
        if number is None and date is None and type_ is None:
            continue
        passport.make_id(person.id, number, date, type_)
        passport.add('holder', person)
        passport.add('passportNumber', number)
        passport.add('startDate', date)
        passport.add('type', type_)
        passport.add('type', doc.findtext('./TYPE_OF_DOCUMENT2'))
        passport.add('summary', doc.findtext('./NOTE'))
        country = doc.findtext('./COUNTRY_OF_ISSUE')
        country = country or doc.findtext('./ISSUING_COUNTRY')
        passport.add('country', normalize_country(country))
        emitter.emit(passport)

    for nat in node.findall('./NATIONALITY/VALUE'):
        person.add('nationality', normalize_country(nat.text))

    for dob in node.findall('./INDIVIDUAL_DATE_OF_BIRTH'):
        date = dob.findtext('./DATE') or dob.findtext('./YEAR')
        person.add('birthDate', date)

    for pob in node.findall('./INDIVIDUAL_PLACE_OF_BIRTH'):
        person.add('country', normalize_country(pob.findtext('./COUNTRY')))
        place = jointext(pob.findtext('./CITY'),
                         pob.findtext('./STATE_PROVINCE'),
                         pob.findtext('./COUNTRY'),
                         sep=', ')
        person.add('birthPlace', place)

    emitter.emit(person)
    emitter.emit(sanction)
Пример #2
0
def parse(context, data):
    emitter = EntityEmitter(context)
    url = data.get('url')
    country = normalize_country(data.get('country'))
    with context.http.rehash(data) as res:
        doc = res.html
        # updated_at = doc.findtext('.//span[@id="lastUpdateDate"]')
        output = doc.find('.//div[@id="countryOutput"]')
        if output is None:
            return
        # component = None
        for row in output.findall('.//li'):
            # next_comp = row.findtext('./td[@class="componentName"]/strong')
            # if next_comp is not None:
            #     component = next_comp
            #     continue
            function = element_text(row.find('.//span[@class="title"]'))
            if function is None:
                continue
            name = element_text(row.find('.//span[@class="cos_name"]'))
            if name is None:
                continue

            person = emitter.make('Person')
            person.make_id(country, name, function)
            person.add('name', name)
            person.add('country', country)
            person.add('position', function)
            person.add('sourceUrl', url)
            emitter.emit(person)
Пример #3
0
def parse(context, data):
    emitter = EntityEmitter(context)
    with context.http.rehash(data) as res:
        for table in res.html.findall('.//table'):
            if 'List of Debarred' not in table.get('summary', ''):
                continue
            rows = table.findall('.//tr')
            for row in rows:
                tds = row.findall('./td')
                if len(tds) != 6:
                    continue
                values = [clean_value(td) for td in tds]
                entity = emitter.make('LegalEntity')
                entity.make_id(*values)

                names = clean_name(values[0])
                if not len(names):
                    context.log.warning("No name: %r", values)
                    continue

                entity.add('name', names[0])
                entity.add('address', values[1])
                entity.add('country', normalize_country(values[2]))
                for name in names[1:]:
                    entity.add('alias', name)

                sanction = emitter.make('Sanction')
                sanction.make_id('Sanction', entity.id)
                sanction.add('authority', 'World Bank Debarrment')
                sanction.add('program', values[5])
                sanction.add('startDate', clean_date(values[3]))
                sanction.add('endDate', clean_date(values[4]))
                sanction.add('sourceUrl', data.get('url'))
                emitter.emit(entity)
                emitter.emit(sanction)
Пример #4
0
def parse(context, data):
    emitter = EntityEmitter(context)
    entity = emitter.make('LegalEntity')

    name = data.get('SUPP_NAME')
    ent_id = data.get('SUPP_ID')
    reason = data.get('DEBAR_REASON')
    country = data.get('COUNTRY_NAME')
    city = data.get('SUPP_CITY')
    address = data.get('SUPP_ADDR')
    start_date = data.get('DEBAR_FROM_DATE')
    end_date = data.get('DEBAR_TO_DATE')


    entity.make_id(name, ent_id, country)

    names = clean_name(name)
    entity.add('name', names[0])
    entity.add('address', address)
    entity.add('address', city)
    entity.add('country', normalize_country(country))
    for name in names[1:]:
        entity.add('alias', name)

    sanction = emitter.make('Sanction')
    sanction.make_id('Sanction', entity.id)
    sanction.add('authority', 'World Bank Debarrment')
    sanction.add('program', reason)
    sanction.add('startDate', clean_date(start_date))
    sanction.add('endDate', clean_date(end_date))
    sanction.add('sourceUrl', SOURCE)
    emitter.emit(entity)
    emitter.emit(sanction)

    emitter.finalize()
Пример #5
0
def parse_address(entity, addr):
    text = addr.xpath('string()').strip()
    if not len(text):
        return
    country = addr.findtext('./COUNTRY')
    address = jointext(addr.findtext('./NOTE'),
                       addr.findtext('./STREET'),
                       addr.findtext('./CITY'),
                       addr.findtext('./STATE_PROVINCE'),
                       country,
                       sep=', ')
    entity.add('address', address)
    entity.add('country', normalize_country(country))
Пример #6
0
def officer(context, data):
    emitter = EntityEmitter(context)
    officer_id = data.get('officer_id')
    url = API_URL % officer_id
    with context.http.get(url, auth=AUTH) as res:
        if res.status_code != 200:
            return
        data = res.json
        person = emitter.make('Person')
        person.make_id(officer_id)
        source_url = urljoin(WEB_URL, data.get('links', {}).get('self', '/'))
        person.add('sourceUrl', source_url)

        last_name = data.pop('surname', None)
        person.add('lastName', last_name)
        forename = data.pop('forename', None)
        person.add('firstName', forename)
        other_forenames = data.pop('other_forenames', None)
        person.add('middleName', other_forenames)
        person.add('name', jointext(forename, other_forenames, last_name))
        person.add('title', data.pop('title', None))

        nationality = normalize_country(data.pop('nationality', None))
        person.add('nationality', nationality)
        person.add('birthDate', data.pop('date_of_birth', None))

        for disqual in data.pop('disqualifications', []):
            case = disqual.get('case_identifier')
            sanction = emitter.make('Sanction')
            sanction.make_id(person.id, case)
            sanction.add('entity', person)
            sanction.add('authority', 'UK Companies House')
            sanction.add('program', case)
            sanction.add('startDate', disqual.pop('disqualified_from', None))
            sanction.add('endDate', disqual.pop('disqualified_until', None))
            emitter.emit(sanction)

            address = disqual.pop('address', {})
            locality = address.get('locality')
            locality = jointext(locality, address.get('postal_code'))
            street = address.get('address_line_1')
            premises = address.get('premises')
            street = jointext(street, premises)
            address = jointext(street, address.get('address_line_2'),
                               locality, address.get('region'), sep=', ')
            person.add('address', address)
        emitter.emit(person)
Пример #7
0
def parse_entry(emitter, entry):
    link = entry.find('.//a')
    url = urljoin(URL, link.get('href'))
    _, member_id = url.rsplit('=', 1)
    person = emitter.make('Person')
    person.make_id(member_id)
    person.add('name', link.text)
    person.add('sourceUrl', url)
    last_name, first_name = link.text.split(', ', 1)
    person.add('lastName', last_name)
    person.add('firstName', first_name)
    person.add('position', entry.findtext('.//span[@class="fonction"]'))
    role, country = entry.findall('.//span[@class="infos"]')
    person.add('summary', role.text_content().strip())
    country = normalize_country(country.text_content().strip())
    person.add('nationality', country)
    person.add('keywords', ['PEP', 'PACE'])
    emitter.emit(person)
Пример #8
0
def parse_entry(emitter, node):
    # ids are per country and entry type (individual/entity)
    country = node.findtext('./Country')
    if ' / ' in country:
        country, _ = country.split(' / ')
    country_code = normalize_country(country)
    entity_name = node.findtext('./Entity')
    item = node.findtext('.//Item')

    entity = emitter.make('LegalEntity')
    if entity_name is None:
        entity = emitter.make('Person')
    entity.make_id(country, entity_name, item)
    entity.add('name', entity_name)
    entity.add('country', country_code)

    sanction = emitter.make('Sanction')
    sanction.make_id(entity.id)
    sanction.add('entity', entity)
    sanction.add('authority', 'Canadian international sanctions')
    sanction.add('program', node.findtext('.//Schedule'))

    given_name = node.findtext('.//GivenName')
    entity.add('firstName', given_name, quiet=True)
    last_name = node.findtext('.//LastName')
    entity.add('lastName', last_name, quiet=True)
    entity.add('name', jointext(given_name, last_name))

    dob = node.findtext('.//DateOfBirth')
    if dob is not None:
        dob = '-'.join(reversed(dob.split('/')))
        entity.add('birthDate', dob, quiet=True)

    names = node.findtext('.//Aliases')
    if names is None:
        return

    for name in names.split(', '):
        name = collapse_spaces(name)
        entity.add('alias', name)

    emitter.emit(entity)
    emitter.emit(sanction)
Пример #9
0
def parse_node(emitter, node):
    mep_id = node.findtext('.//id')
    person = emitter.make("Person")
    person.make_id(mep_id)
    name = node.findtext('.//fullName')
    person.add("name", name)
    url = 'http://www.europarl.europa.eu/meps/en/%s' % mep_id
    person.add("sourceUrl", url)
    first_name, last_name = split_name(name)
    person.add("firstName", first_name)
    person.add("lastName", last_name)
    country = normalize_country(node.findtext('.//country'))
    person.add("nationality", country)
    person.add("keywords", ['PEP', 'MEP'])
    emitter.emit(person)

    party_name = node.findtext('.//nationalPoliticalGroup')
    if party_name not in ['Independent']:
        party = emitter.make('Organization')
        party.make_id('nationalPoliticalGroup', party_name)
        party.add('name', party_name)
        party.add('country', country)
        emitter.emit(party)
        membership = emitter.make('Membership')
        membership.make_id(person.id, party.id)
        membership.add('member', person)
        membership.add('organization', party)
        emitter.emit(membership)

    group_name = node.findtext('.//politicalGroup')
    group = emitter.make('Organization')
    group.make_id('politicalGroup', group_name)
    group.add('name', group_name)
    group.add('country', 'eu')
    emitter.emit(group)
    membership = emitter.make('Membership')
    membership.make_id(person.id, group.id)
    membership.add('member', person)
    membership.add('organization', group)
    emitter.emit(membership)
Пример #10
0
def parse_reference(emitter, reference, rows):
    entity = emitter.make('LegalEntity')
    entity.make_id(reference)
    entity.add('sourceUrl', URL)
    sanction = emitter.make('Sanction')
    sanction.make_id(entity.id)
    sanction.add(
        'authority',
        'Australian Department of Foreign Affairs and Trade Consolidated Sanctions'
    )  # noqa
    sanction.add('entity', entity)

    for row in rows:
        if row.pop('type') == 'Individual':
            entity.schema = model.get('Person')

        name = row.pop('name_of_individual_or_entity', None)
        if row.pop('name_type') == 'aka':
            entity.add('alias', name)
        else:
            entity.add('name', name)

        entity.add('address', row.pop('address'))
        entity.add('notes', row.pop('additional_information'))
        sanction.add('program', row.pop('committees'))
        nationality = normalize_country(row.pop('citizenship'))
        entity.add('nationality', nationality, quiet=True)
        entity.add('birthDate', row.pop('date_of_birth'), quiet=True)
        entity.add('birthPlace', row.pop('place_of_birth'), quiet=True)
        entity.add('status', row.pop('listing_information'), quiet=True)

        control_date = int(row.pop('control_date'))
        base_date = datetime(1900, 1, 1).toordinal()
        dt = datetime.fromordinal(base_date + control_date - 2)
        sanction.add('modifiedAt', dt.date())
        entity.add('modifiedAt', dt.date())

    emitter.emit(entity)
    emitter.emit(sanction)
Пример #11
0
def parse_row(emitter, row):
    entity = emitter.make('LegalEntity')
    entity.make_id(row.get('Effective_Date'), row.get('Name'))
    entity.add('name', row.get('Name'))
    entity.add('notes', row.get('Action'))
    entity.add('country', normalize_country(row.get('Country')))
    # entity.updated_at = row.get('Effective_Date')

    address = jointext(row.get('Street_Address'),
                       row.get('Postal_Code'),
                       row.get('City'),
                       row.get('State'),
                       sep=', ')
    entity.add('address', address)
    emitter.emit(entity)

    sanction = emitter.make('Sanction')
    sanction.make_id(entity.id, row.get('FR_Citation'))
    sanction.add('entity', entity)
    sanction.add('program', row.get('FR_Citation'))
    sanction.add('authority', 'US Bureau of Industry and Security')
    sanction.add('country', 'us')
    sanction.add('startDate', row.get('Effective_Date'))
    emitter.emit(sanction)
Пример #12
0
def parse_entry(emitter, group, rows):
    entity = emitter.make('LegalEntity')
    entity.make_id(group)
    sanction = emitter.make('Sanction')
    sanction.make_id(entity.id, 'Sanction')
    sanction.add('entity', entity)
    sanction.add('authority', 'HM Treasury Financial sanctions targets')
    sanction.add('country', 'gb')
    for row in rows:
        if row.pop('Group Type') == 'Individual':
            entity.schema = model.get('Person')
        row.pop('Alias Type', None)
        name1 = row.pop('Name 1')
        entity.add('firstName', name1, quiet=True)
        name2 = row.pop('Name 2')
        name3 = row.pop('Name 3')
        name4 = row.pop('Name 4')
        name5 = row.pop('Name 5')
        name6 = row.pop('Name 6')
        entity.add('lastName', name6, quiet=True)
        name = jointext(name1, name2, name3, name4, name5, name6)
        if not entity.has('name'):
            entity.add('name', name)
        else:
            entity.add('alias', name)
        entity.add('title', row.pop('Title'), quiet=True)
        sanction.add('program', row.pop('Regime'))
        last_updated = parse_date(row.pop('Last Updated'))
        sanction.add('modifiedAt', last_updated)
        sanction.add('startDate', parse_date(row.pop('Listed On')))
        entity.add('modifiedAt', last_updated)
        entity.add('position', row.pop('Position'), quiet=True)
        entity.add('notes', row.pop('Other Information'), quiet=True)
        entity.add('birthDate', parse_date(row.pop('DOB')), quiet=True)

        nationality = normalize_country(row.pop('Nationality', None))
        entity.add('nationality', nationality, quiet=True)

        country = row.pop('Country', None)
        entity.add('country', normalize_country(country))

        address = jointext(row.pop('Address 1', None),
                           row.pop('Address 2', None),
                           row.pop('Address 3', None),
                           row.pop('Address 4', None),
                           row.pop('Address 5', None),
                           row.pop('Address 6', None),
                           row.pop('Post/Zip Code', None),
                           country)
        entity.add('address', address, quiet=True)

        passport = row.pop('Passport Details', None)
        entity.add('passportNumber', passport, quiet=True)

        national_id = row.pop('NI Number', None)
        entity.add('nationalId', national_id, quiet=True)

        country_of_birth = []
        for country in split_items(row.pop('Country of Birth')):
            code = normalize_country(country)
            country_of_birth.append(country)
            entity.add('country', code)

        for town in split_items(row.pop('Town of Birth', None)):
            entity.add('birthPlace', town)

    emitter.emit(entity)
    emitter.emit(sanction)
Пример #13
0
def parse_entry(emitter, entry):
    entity = emitter.make('LegalEntity')
    if entry.findtext('./type-entry') == '2':
        entity = emitter.make('Person')
    entity.make_id(entry.findtext('number-entry'))

    sanction = emitter.make('Sanction')
    sanction.make_id('Sanction', entity.id)
    sanction.add('entity', entity)
    sanction.add('authority', 'State Financial Monitoring Service of Ukraine')
    sanction.add(
        'sourceUrl',
        'http://www.sdfm.gov.ua/articles.php?cat_id=87&lang=en')  # noqa
    sanction.add('program', entry.findtext('./program-entry'))
    date_entry = entry.findtext('./date-entry')
    if date_entry:
        date = datetime.strptime(date_entry, '%Y%m%d').date()
        sanction.add('startDate', date)

    for aka in entry.findall('./aka-list'):
        first_name = aka.findtext('./aka-name1')
        entity.add('firstName', first_name, quiet=True)
        second_name = aka.findtext('./aka-name2')
        entity.add('secondName', second_name, quiet=True)
        third_name = aka.findtext('./aka-name3')
        entity.add('middleName', third_name, quiet=True)
        last_name = aka.findtext('./aka-name4')
        entity.add('lastName', last_name, quiet=True)
        name = jointext(first_name, second_name, third_name, last_name)
        if aka.findtext('type-aka') == 'N':
            entity.add('name', name)
        else:
            if aka.findtext('./quality-aka') == '2':
                entity.add('weakAlias', name)
            else:
                entity.add('alias', name)

    for node in entry.findall('./title-list'):
        entity.add('title', node.text, quiet=True)

    for doc in entry.findall('./document-list'):
        reg = doc.findtext('./document-reg')
        number = doc.findtext('./document-id')
        country = normalize_country(doc.findtext('./document-country'))
        passport = emitter.make('Passport')
        passport.make_id('Passport', entity.id, reg, number, country)
        passport.add('holder', entity)
        passport.add('passportNumber', number)
        passport.add('summary', reg)
        passport.add('country', country)
        emitter.emit(passport)

    for doc in entry.findall('./id-number-list'):
        entity.add('idNumber', doc.text)

    for node in entry.findall('./address-list'):
        entity.add('address', node.findtext('./address'))

    for pob in entry.findall('./place-of-birth-list'):
        entity.add('birthPlace', pob.text, quiet=True)

    for dob in entry.findall('./date-of-birth-list'):
        entity.add('birthDate', parse_date(dob.text), quiet=True)

    for nat in entry.findall('./nationality-list'):
        entity.add('nationality', normalize_country(nat.text), quiet=True)

    emitter.emit(entity)
    emitter.emit(sanction)