def parse(context, data): emitter = EntityEmitter(context) with context.http.rehash(data) as res: for table in res.html.findall('.//table'): if 'List of Debarred' not in table.get('summary', ''): continue rows = table.findall('.//tr') for row in rows: tds = row.findall('./td') if len(tds) != 6: continue values = [clean_value(td) for td in tds] entity = emitter.make('LegalEntity') entity.make_id(*values) names = clean_name(values[0]) if not len(names): context.log.warning("No name: %r", values) continue entity.add('name', names[0]) entity.add('address', values[1]) entity.add('country', normalize_country(values[2])) for name in names[1:]: entity.add('alias', name) sanction = emitter.make('Sanction') sanction.make_id('Sanction', entity.id) sanction.add('authority', 'World Bank Debarrment') sanction.add('program', values[5]) sanction.add('startDate', clean_date(values[3])) sanction.add('endDate', clean_date(values[4])) sanction.add('sourceUrl', data.get('url')) emitter.emit(entity) emitter.emit(sanction)
def parse(context, data): emitter = EntityEmitter(context) url = data.get('url') country = normalize_country(data.get('country')) with context.http.rehash(data) as res: doc = res.html # updated_at = doc.findtext('.//span[@id="lastUpdateDate"]') output = doc.find('.//div[@id="countryOutput"]') if output is None: return # component = None for row in output.findall('.//li'): # next_comp = row.findtext('./td[@class="componentName"]/strong') # if next_comp is not None: # component = next_comp # continue function = element_text(row.find('.//span[@class="title"]')) if function is None: continue name = element_text(row.find('.//span[@class="cos_name"]')) if name is None: continue person = emitter.make('Person') person.make_id(country, name, function) person.add('name', name) person.add('country', country) person.add('position', function) person.add('sourceUrl', url) emitter.emit(person)
def parse(context, data): emitter = EntityEmitter(context) entity = emitter.make('LegalEntity') name = data.get('SUPP_NAME') ent_id = data.get('SUPP_ID') reason = data.get('DEBAR_REASON') country = data.get('COUNTRY_NAME') city = data.get('SUPP_CITY') address = data.get('SUPP_ADDR') start_date = data.get('DEBAR_FROM_DATE') end_date = data.get('DEBAR_TO_DATE') entity.make_id(name, ent_id, country) names = clean_name(name) entity.add('name', names[0]) entity.add('address', address) entity.add('address', city) entity.add('country', normalize_country(country)) for name in names[1:]: entity.add('alias', name) sanction = emitter.make('Sanction') sanction.make_id('Sanction', entity.id) sanction.add('authority', 'World Bank Debarrment') sanction.add('program', reason) sanction.add('startDate', clean_date(start_date)) sanction.add('endDate', clean_date(end_date)) sanction.add('sourceUrl', SOURCE) emitter.emit(entity) emitter.emit(sanction) emitter.finalize()
def parse_notice(context, data): with context.http.rehash(data) as res: res = res.json first_name = res['forename'] or '' last_name = res['name'] or '' dob = res['date_of_birth'] nationalities = res['nationalities'] place_of_birth = res['place_of_birth'] warrants = [ (warrant['charge'], warrant['issuing_country_id']) for warrant in res['arrest_warrants'] # noqa ] gender = SEXES.get(res['sex_id']) emitter = EntityEmitter(context) entity = emitter.make('Person') entity.make_id(first_name, last_name, res['entity_id']) entity.add('name', first_name + ' ' + last_name) entity.add('firstName', first_name) entity.add('lastName', last_name) entity.add('nationality', nationalities) for charge, country in warrants: entity.add('program', country) entity.add('summary', charge) entity.add('gender', gender) entity.add('birthPlace', place_of_birth) entity.add('birthDate', parse_date(dob)) entity.add('sourceUrl', res['_links']['self']['href']) entity.add('keywords', 'REDNOTICE') entity.add('keywords', 'CRIME') emitter.emit(entity) emitter.finalize()
def officer(context, data): emitter = EntityEmitter(context) officer_id = data.get('officer_id') url = API_URL % officer_id with context.http.get(url, auth=AUTH) as res: if res.status_code != 200: return data = res.json person = emitter.make('Person') person.make_id(officer_id) source_url = urljoin(WEB_URL, data.get('links', {}).get('self', '/')) person.add('sourceUrl', source_url) last_name = data.pop('surname', None) person.add('lastName', last_name) forename = data.pop('forename', None) person.add('firstName', forename) other_forenames = data.pop('other_forenames', None) person.add('middleName', other_forenames) person.add('name', jointext(forename, other_forenames, last_name)) person.add('title', data.pop('title', None)) nationality = normalize_country(data.pop('nationality', None)) person.add('nationality', nationality) person.add('birthDate', data.pop('date_of_birth', None)) for disqual in data.pop('disqualifications', []): case = disqual.get('case_identifier') sanction = emitter.make('Sanction') sanction.make_id(person.id, case) sanction.add('entity', person) sanction.add('authority', 'UK Companies House') sanction.add('program', case) sanction.add('startDate', disqual.pop('disqualified_from', None)) sanction.add('endDate', disqual.pop('disqualified_until', None)) emitter.emit(sanction) address = disqual.pop('address', {}) locality = address.get('locality') locality = jointext(locality, address.get('postal_code')) street = address.get('address_line_1') premises = address.get('premises') street = jointext(street, premises) address = jointext(street, address.get('address_line_2'), locality, address.get('region'), sep=', ') person.add('address', address) emitter.emit(person)
def parse(context, data): emitter = EntityEmitter(context) url = data.get('url') context.log.info("Interpol Red Notices URL: %s", url) with context.http.get(url) as result: context.log.info("result.text=%s", result.text) dict = json.loads(result.text) name = jointext(dict['forename'], dict['name']) if name is None or name == 'Identity unknown': return entity = emitter.make('Person') entity.make_id(url) entity.add('name', name) entity.add('sourceUrl', url) description = dict['distinguishing_marks'] entity.add('description', description) entity.add('keywords', 'REDNOTICE') entity.add('keywords', 'CRIME') if ', ' in name: last, first = name.split(', ', 1) entity.add('alias', jointext(first, last)) warrants = dict['arrest_warrants'] summary = '' for warrant in warrants: issuingCountryId = jointext('[', warrant['issuing_country_id'], ']', sep='') charge = jointext(issuingCountryId, warrant['charge']) summary = jointext(summary, charge, sep='\r\n') entity.add('summary', summary) entity.add('lastName', dict['name']) entity.add('firstName', dict['forename']) entity.add('nationality', dict['nationalities']) entity.add('gender', SEXES[dict['sex_id']]) entity.add('birthDate', dict['date_of_birth']) entity.add('birthPlace', dict['place_of_birth']) emitter.emit(entity)
def parse(context, data): emitter = EntityEmitter(context) with context.http.rehash(data) as result: doc = result.html name = element_text(doc.find('.//div[@class="nom_fugitif_wanted"]')) if name is None or name == 'Identity unknown': return entity = emitter.make('Person') entity.make_id(data.get('url')) entity.add('name', name) entity.add('sourceUrl', data.get('url')) wanted = element_text( doc.find('.//span[@class="nom_fugitif_wanted_small"]')) # noqa entity.add('program', wanted) entity.add('keywords', 'REDNOTICE') entity.add('keywords', 'CRIME') if ', ' in name: last, first = name.split(', ', 1) entity.add('alias', jointext(first, last)) for row in doc.findall('.//div[@class="bloc_detail"]//tr'): title, value = row.findall('./td') name = slugify(element_text(title), sep='_') value = element_text(value) if value is None: continue if name == 'charges': entity.add('summary', value) elif name == 'present_family_name': entity.add('lastName', value) elif name == 'forename': entity.add('firstName', value) elif name == 'nationality': for country in value.split(', '): entity.add('nationality', country) elif name == 'sex': entity.add('gender', SEXES[value]) elif name == 'date_of_birth': entity.add('birthDate', value.split('(')[0]) elif name == 'place_of_birth': entity.add('birthPlace', value) emitter.emit(entity) emitter.finalize()