Exemplo n.º 1
0
def crawl_company(context: Context) -> None:
    data = json_resource(context, context.dataset.data.url, "company")
    for row in data["data"]:
        row = clean_row(row)
        # context.pprint(row)
        company_id = row.pop("company_id")
        name_en = row.pop("name_en", None)
        name = row.pop("name", None) or name_en
        entity = context.make("Organization")
        entity.id = context.make_slug("company", company_id, name)
        if entity.id is None:
            entity.id = context.make_slug(
                "company",
                company_id,
                row.pop("ogrn", None),
                strict=False,
            )
        entity.add("name", name)
        entity.add("name", name_en)
        entity.add("name", row.pop("name_uk", None))
        entity.add("name", row.pop("name_ru", None))
        entity.add("innCode", row.pop("inn", None))
        entity.add_cast("Company", "ogrnCode", row.pop("ogrn", None))

        country = row.pop("country", None)
        entity.add("country", COUNTRIES[country])
        entity.add("topics", "sanction")
        entity.add("notes", row.pop("reasoning_en", None))
        entity.add("notes", row.pop("reasoning_ru", None))
        entity.add("notes", row.pop("reasoning_uk", None))

        context.emit(entity, target=True)
        row.pop("logo_en", None)
Exemplo n.º 2
0
def parse_entry(context: Context, entry, parties):
    party_id = context.make_slug(entry.get("ProfileID"))
    party = parties[party_id]

    sanction = h.make_sanction(context, party, key=entry.get("ID"))
    sanction.add("program", ref_value("List", entry.get("ListID")))

    for event in entry.findall("./EntryEvent"):
        date = parse_date(event.find("./Date"))
        party.add("createdAt", date)
        sanction.add("summary", event.findtext("./Comment"))
        basis = ref_value("LegalBasis", event.get("LegalBasisID"))
        sanction.add("reason", basis)

    party.add("topics", "sanction")
    sanction.add("listingDate", party.get("createdAt"))
    sanction.add("startDate", party.get("modifiedAt"))

    for measure in entry.findall("./SanctionsMeasure"):
        sanction.add("summary", measure.findtext("./Comment"))
        type_id = measure.get("SanctionsTypeID")
        sanction.add("program", ref_value("SanctionsType", type_id))

    context.emit(sanction)
    context.emit(party, target=True)
Exemplo n.º 3
0
def parse_party(context: Context, distinct_party, locations, documents):
    profile = distinct_party.find("Profile")
    sub_type = ref_get("PartySubType", profile.get("PartySubTypeID"))
    schema = TYPES.get(sub_type.get("Value"))
    type_ = ref_value("PartyType", sub_type.get("PartyTypeID"))
    schema = TYPES.get(type_, schema)
    if schema is None:
        context.log.error("Unknown party type", value=type_)
        return
    party = context.make(schema)
    party.id = context.make_slug(profile.get("ID"))
    party.add("notes", h.clean_note(distinct_party.findtext("Comment")))
    party.add("sourceUrl", URL % profile.get("ID"))

    for identity in profile.findall("./Identity"):
        parts = {}
        for group in identity.findall(".//NamePartGroup"):
            type_id = group.get("NamePartTypeID")
            parts[group.get("ID")] = ref_value("NamePartType", type_id)

        for alias in identity.findall("./Alias"):
            parse_alias(party, parts, alias)

        for regdoc in documents.get(identity.get("ID"), []):
            parse_registration_doc(context, party, regdoc)

    for feature in profile.findall("./Feature"):
        parse_feature(context, feature, party, locations)

    context.emit(party, target=True)
    # pprint(party.to_dict())
    # context.log.info("[%s] %s" % (party.schema.name, party.caption))
    return party
Exemplo n.º 4
0
def crawl(context: Context):
    url = context.dataset.data.url
    headers = {"apikey": context.dataset.data.api_key}
    data = context.fetch_json(url, headers=headers)
    # TODO write this out to a source.json
    for data in data["response"]["ZPROCSUPP"]:
        # context.pprint(data)
        entity = context.make("LegalEntity")
        name = data.get("SUPP_NAME")
        ent_id = data.get("SUPP_ID")
        entity.id = context.make_slug(ent_id)
        names = clean_name(name)
        entity.add("name", names[0])
        entity.add("topics", "debarment")
        entity.add("country", data.get("COUNTRY_NAME"))
        for name in names[1:]:
            entity.add("alias", name)

        address = h.make_address(
            context,
            street=data.get("SUPP_ADDR"),
            city=data.get("SUPP_CITY"),
            country=data.get("COUNTRY_NAME"),
            key=entity.id,
        )
        h.apply_address(context, entity, address)

        sanction = h.make_sanction(context, entity)
        sanction.add("program", data.get("DEBAR_REASON"))
        sanction.add("startDate",
                     h.parse_date(data.get("DEBAR_FROM_DATE"), FORMATS))
        sanction.add("endDate", h.parse_date(data.get("DEBAR_TO_DATE"),
                                             FORMATS))
        context.emit(entity, target=True)
        context.emit(sanction)
Exemplo n.º 5
0
def crawl_person(context: Context) -> None:
    data = json_resource(context, context.dataset.data.url, "person")
    for row in data["data"]:
        row = clean_row(row)
        person_id = row.pop("person_id")
        name_en = row.pop("name_en", None)
        name_ru = row.pop("name_ru", None)
        name_uk = row.pop("name_uk", None)
        name = name_en or name_ru or name_uk
        entity = context.make("Person")
        entity.id = context.make_slug("person", person_id, name)
        entity.add("name", name)
        entity.add("alias", name_ru)
        entity.add("alias", name_uk)
        entity.add("birthDate", parse_date(row.pop("date_bd", None)))
        url = "https://sanctions.nazk.gov.ua/sanction-person/%s/"
        entity.add("sourceUrl", url % person_id)
        if row.get("city_bd_en") != "N/A":
            entity.add("birthPlace", row.pop("city_bd_en", None))
            entity.add("birthPlace", row.pop("city_bd_ru", None))
            entity.add("birthPlace", row.pop("city_bd_uk", None))
        entity.add("position", row.pop("position_en", None))
        entity.add("position", row.pop("position_ru", None))
        entity.add("position", row.pop("position_uk", None))
        entity.add("notes", row.pop("reasoning_en", None))
        entity.add("notes", row.pop("reasoning_ru", None))
        entity.add("notes", row.pop("reasoning_uk", None))

        country = row.get("country", None)
        entity.add("country", COUNTRIES[country])
        entity.add("topics", "sanction")
        context.emit(entity, target=True)
Exemplo n.º 6
0
def parse_row(context: Context, row):
    entity = context.make("LegalEntity")
    entity.id = context.make_slug(row.get("Effective_Date"), row.get("Name"))
    entity.add("name", row.get("Name"))
    entity.add("notes", row.get("Action"))
    entity.add("country", row.get("Country"))
    entity.add("modifiedAt", row.get("Last_Update"))

    address = h.make_address(
        context,
        street=row.get("Street_Address"),
        postal_code=row.get("Postal_Code"),
        city=row.get("City"),
        region=row.get("State"),
        country=row.get("Country"),
    )
    h.apply_address(context, entity, address)
    context.emit(entity, target=True)

    citation = row.get("FR_Citation")
    sanction = h.make_sanction(context, entity, key=citation)
    sanction.add("program", citation)
    sanction.add("startDate", h.parse_date(row.get("Effective_Date"), FORMATS))
    sanction.add("endDate", h.parse_date(row.get("Expiration_Date"), FORMATS))
    # pprint(row)
    context.emit(sanction)
Exemplo n.º 7
0
def crawl_common(context: Context, data: Dict[str, str], part: str,
                 schema: str):
    entity = context.make(schema)
    entity.id = context.make_slug(part, data.pop("DATAID"))
    entity.add("topics", "sanction")
    entity.add("notes", h.clean_note(data.pop("COMMENTS1")))
    entity.add("notes", h.clean_note(data.pop("NOTE", None)))
    entity.add("alias", data.pop("NAME_ORIGINAL_SCRIPT"))

    h.apply_name(
        entity,
        name1=data.pop("FIRST_NAME", None),
        name2=data.pop("SECOND_NAME", None),
        name3=data.pop("THIRD_NAME", None),
        name4=data.pop("FOURTH_NAME", None),
        quiet=True,
    )

    sanction = h.make_sanction(context, entity)
    submitted_on = parse_date(data.pop("SUBMITTED_ON", None))
    listed_on = parse_date(data.pop("LISTED_ON"))
    modified_at = parse_date(data.pop("LAST_DAY_UPDATED"))
    entity.add("createdAt", submitted_on or listed_on or modified_at)
    entity.add("modifiedAt", modified_at)

    sanction.add("listingDate", submitted_on or listed_on)
    sanction.add("startDate", listed_on)
    sanction.add("program", data.pop("UN_LIST_TYPE"))
    sanction.add("program", data.pop("LIST_TYPE"))
    sanction.add("unscId", data.pop("REFERENCE_NUMBER"))
    sanction.add("authority", data.pop("SUBMITTED_BY", None))
    context.emit(sanction)
    return entity
Exemplo n.º 8
0
def crawl(context: Context):
    path = context.fetch_resource("source.html", context.dataset.data.url)
    context.export_resource(path, HTML, title=context.SOURCE_TITLE)
    with open(path, "r", encoding="utf-8") as fh:
        doc = html.fromstring(fh.read())
    for table in doc.findall('.//div[@class="editor-content"]//table'):
        headers = None
        schema = None
        for row in table.findall(".//tr"):
            cells = [
                collapse_spaces(c.text_content()) for c in row.findall("./td")
            ]
            if headers is None:
                headers = [slugify(c, sep="_") for c in cells]
                continue
            if len(cells) == 1:
                schema = TYPES[cells[0]]
                continue
            row = dict(zip(headers, cells))

            entity = context.make(schema)
            name = row.pop("imie_i_nazwisko_nazwa_podmiotu")
            entity.id = context.make_slug(name)
            names = name.split("(")
            entity.add("name", names[0])
            for alias in names[1:]:
                entity.add("alias", alias.split(")")[0])
            notes = row.pop("uzasadnienie_wpisu_na_liste")
            entity.add("notes", notes)

            details = row.pop("dane_identyfikacyjne_osoby_podmiotu")
            for (chop, prop) in CHOPSKA:
                parts = details.rsplit(chop, 1)
                details = parts[0]
                if len(parts) > 1:
                    if prop == "address":
                        addr = h.make_address(context, full=parts[1])
                        h.apply_address(context, entity, addr)
                    else:
                        entity.add(prop, parts[1])
            if len(details.strip()):
                result = context.lookup("details", details)
                if result is None:
                    context.log.warning("Unhandled details", details=details)
                else:
                    for prop, value in result.props.items():
                        entity.add(prop, value)

            sanction = h.make_sanction(context, entity)
            provisions = row.pop("zastosowane_srodki_sankcyjne")
            sanction.add("provisions", provisions)

            start_date = row.pop("data_umieszczenia_na_liscie")
            start_date = start_date.replace(" r.", "")
            sanction.add("startDate", h.parse_date(start_date, ["%d.%m.%Y"]))

            h.audit_data(row)
            context.emit(entity, target=True)
            context.emit(sanction)
Exemplo n.º 9
0
def parse_person(context: Context, data, country, lastmod):
    person_id = data.pop("id", None)
    person = context.make("Person")
    person.id = context.make_slug(person_id)
    person.add("nationality", country)
    name = data.get("name")
    if name is None or name.lower().strip() in ("unknown", ):
        return
    person.add("modifiedAt", lastmod.date())
    person.add("name", data.pop("name", None))
    person.add("alias", data.pop("sort_name", None))
    for other in data.pop("other_names", []):
        person.add("alias", other.get("name"))
    person.add("gender", data.pop("gender", None))
    person.add("title", data.pop("honorific_prefix", None))
    person.add("title", data.pop("honorific_suffix", None))
    person.add("firstName", data.pop("given_name", None))
    person.add("lastName", data.pop("family_name", None))
    person.add("fatherName", data.pop("patronymic_name", None))
    person.add("birthDate", data.pop("birth_date", None))
    person.add("deathDate", data.pop("death_date", None))
    person.add("email", h.clean_emails(data.pop("email", None)))
    person.add("notes", data.pop("summary", None))
    person.add("topics", "role.pep")

    for link in data.pop("links", []):
        url = link.get("url")
        if link.get("note") in ("website", "blog", "twitter", "facebook"):
            person.add("website", url)
        # elif "Wikipedia (" in link.get("note") and "wikipedia.org" in url:
        #     person.add("wikipediaUrl", url)
        # elif "wikipedia" in link.get("note") and "wikipedia.org" in url:
        #     person.add("wikipediaUrl", url)
        # else:
        #     person.log.info("Unknown URL", url=url, note=link.get("note"))

    for ident in data.pop("identifiers", []):
        identifier = ident.get("identifier")
        scheme = ident.get("scheme")
        if scheme == "wikidata" and identifier.startswith("Q"):
            person.add("wikidataId", identifier)

    for contact_detail in data.pop("contact_details", []):
        value = contact_detail.get("value")
        if "email" == contact_detail.get("type"):
            person.add("email", h.clean_emails(value))
        if "phone" == contact_detail.get("type"):
            person.add("phone", h.clean_phones(value))

    if check_person_cutoff(person):
        return

    # data.pop("image", None)
    # data.pop("images", None)
    # if len(data):
    #     pprint(data)
    context.emit(person, target=True)
    # entities[person_id] = person.id
    return person.id
Exemplo n.º 10
0
def parse_reference(context: Context, reference: int, rows):
    schemata = set()
    for row in rows:
        type_ = row.pop("type")
        schema = context.lookup_value("type", type_)
        if schema is None:
            context.log.warning("Unknown entity type", type=type_)
            return
        schemata.add(schema)
    assert len(schemata) == 1, schemata
    entity = context.make(schemata.pop())

    primary_name = None
    for row in rows:
        name = row.pop("name_of_individual_or_entity", None)
        name_type = row.pop("name_type")
        name_prop = context.lookup_value("name_type", name_type)
        if name_prop is None:
            context.log.warning("Unknown name type", name_type=name_type)
            return
        entity.add(name_prop, name)
        if name_prop == "name":
            primary_name = name

    entity.id = context.make_slug(reference, primary_name)
    sanction = h.make_sanction(context, entity)

    primary_name = None
    for row in rows:
        addr = row.pop("address")
        if addr is not None:
            for part in multi_split(addr, SPLITS):
                address = h.make_address(context, full=part)
                h.apply_address(context, entity, address)
        sanction.add("program", row.pop("committees"))
        citizen = multi_split(row.pop("citizenship"), ["a)", "b)", "c)", "d)"])
        entity.add("nationality", citizen, quiet=True)
        dates = clean_date(row.pop("date_of_birth"))
        entity.add("birthDate", dates, quiet=True)
        entity.add("birthPlace", row.pop("place_of_birth"), quiet=True)
        entity.add("notes", h.clean_note(row.pop("additional_information")))
        listing_info = row.pop("listing_information")
        if isinstance(listing_info, datetime):
            entity.add("createdAt", listing_info)
            sanction.add("listingDate", listing_info)
        else:
            sanction.add("summary", listing_info)
        # TODO: consider parsing if it's not a datetime?

        control_date = row.pop("control_date")
        sanction.add("startDate", control_date)
        entity.add("createdAt", control_date)

    entity.add("topics", "sanction")
    context.emit(entity, target=True)
    context.emit(sanction)
Exemplo n.º 11
0
def crawl_node(context: Context, node):
    mep_id = node.findtext(".//id")
    person = context.make("Person")
    person.id = context.make_slug(mep_id)
    url = "http://www.europarl.europa.eu/meps/en/%s" % mep_id
    person.add("sourceUrl", url)
    name = node.findtext(".//fullName")
    person.add("name", name)
    first_name, last_name = split_name(name)
    person.add("firstName", first_name)
    person.add("lastName", last_name)
    person.add("nationality", node.findtext(".//country"))
    person.add("topics", "role.pep")
    context.emit(person, target=True)

    party_name = node.findtext(".//nationalPoliticalGroup")
    if party_name not in ["Independent"]:
        party = context.make("Organization")
        party.id = context.make_slug("npg", party_name)
        if party.id is not None:
            party.add("name", party_name)
            party.add("country", node.findtext(".//country"))
            context.emit(party)
            membership = context.make("Membership")
            membership.id = context.make_id(person.id, party.id)
            membership.add("member", person)
            membership.add("organization", party)
            context.emit(membership)

    group_name = node.findtext(".//politicalGroup")
    group = context.make("Organization")
    group.id = context.make_slug("pg", group_name)
    if group.id is not None:
        group.add("name", group_name)
        group.add("country", "eu")
        context.emit(group)
        membership = context.make("Membership")
        membership.id = context.make_id(person.id, group.id)
        membership.add("member", person)
        membership.add("organization", group)
        context.emit(membership)
Exemplo n.º 12
0
def crawl(context: Context):
    path = context.fetch_resource("source.json", context.dataset.data.url)
    context.export_resource(path, JSON, title=context.SOURCE_TITLE)
    with open(path, "r") as fh:
        data = json.load(fh)
    for record in data:
        bank = context.make("Company")
        charter_no = record.pop("CharterNumber")
        bank_name = record.pop("BankName")
        bank.id = context.make_slug(charter_no, bank_name)
        bank.add("name", bank_name)
        bank.add("registrationNumber", charter_no)
        bank.add("country", "us")
        bank.add("topics", "fin.bank")
        if bank.id is not None:
            context.emit(bank)
        company_name = record.pop("CompanyName")
        first_name = record.pop("FirstName")
        last_name = record.pop("LastName")
        if company_name:
            entity = context.make("Company")
            entity.id = context.make_id(charter_no, bank_name, company_name)
            entity.add("name", company_name)
        else:
            entity = context.make("Person")
            entity.id = context.make_id(charter_no, bank_name, first_name,
                                        last_name)
            h.apply_name(entity, first_name=first_name, last_name=last_name)
        entity.add("country", "us")
        entity.add("topics", "crime.fin")

        addr = h.make_address(
            context,
            city=record.pop("CityName"),
            state=record.pop("StateName"),
            country_code="us",
        )
        record.pop("StateAbbreviation")
        h.apply_address(context, entity, addr)

        sanction = h.make_sanction(context, entity)
        sanction.add("startDate", record.pop("CompleteDate", None))
        sanction.add("endDate", record.pop("TerminationDate", None))
        sanction.add("program", record.pop("EnforcementTypeDescription", None))
        sanction.add("authorityId", record.pop("DocketNumber", None))
        # context.pprint(record)
        context.emit(entity, target=True)
        context.emit(sanction)
Exemplo n.º 13
0
def crawl(context: Context):
    path = context.fetch_resource("source.html", context.dataset.data.url)
    context.export_resource(path, HTML, title=context.SOURCE_TITLE)
    with open(path, "r") as fh:
        doc = html.parse(fh)

    for node in doc.findall(".//td[@class='tailSTxt']"):
        if not node.text_content().startswith("2."):
            continue
        for item in node.findall(".//tr"):
            number = item.find(".//td[@class='sProvP1No']").text_content()
            text = item.findtext(".//td[@class='sProvP1']")
            text = text.strip().rstrip(";").rstrip(".")
            name, _ = text.split("(", 1)
            names = multi_split(name, ["s/o", "@"])

            entity = context.make("Person")
            entity.id = context.make_slug(number, name)
            entity.add("name", names)
            entity.add("topics", "sanction")

            sanction = h.make_sanction(context, entity)
            sanction.add("program", PROGRAM)

            for match in IN_BRACKETS.findall(text):
                # match = match.replace("\xa0", "")
                res = context.lookup("props", match)
                if res is not None:
                    for prop, value in res.props.items():
                        entity.add(prop, value)
                    continue
                if match.endswith("citizen"):
                    nat = match.replace("citizen", "")
                    entity.add("nationality", nat)
                    continue
                if match.startswith(DOB):
                    dob = match.replace(DOB, "").strip()
                    entity.add("birthDate", h.parse_date(dob, ["%d %B %Y"]))
                    continue
                if match.startswith(PASSPORT):
                    passport = match.replace(PASSPORT, "").strip()
                    entity.add("passportNumber", passport)
                    continue
                context.log.warn("Unparsed bracket term", term=match)

            context.emit(entity, target=True)
            context.emit(sanction)
Exemplo n.º 14
0
def parse_entry(context: Context, node: _Element):
    entity_name = node.findtext("./Entity")
    dob = node.findtext("./DateOfBirth")
    schedule = node.findtext("./Schedule")
    if schedule == "N/A":
        schedule = ""
    program = node.findtext("./Country")
    item = node.findtext("./Item")
    if entity_name is not None:
        entity = context.make("LegalEntity")
        entity.add("name", entity_name.split("/"))
    else:
        entity = context.make("Person")
        given_name = node.findtext("./GivenName")
        last_name = node.findtext("./LastName")
        entity_name = h.make_name(given_name=given_name, last_name=last_name)
        entity.add("name", entity_name)
        entity.add("birthDate", dob)

    country = program
    if program is not None and "/" in program:
        country, _ = program.split("/")
    entity.add("country", country)

    entity.id = context.make_slug(
        schedule,
        item,
        entity.first("country"),
        entity_name,
        strict=False,
    )

    sanction = h.make_sanction(context, entity)
    sanction.add("program", program)
    sanction.add("reason", schedule)
    sanction.add("authorityId", item)

    names = node.findtext("./Aliases")
    if names is not None:
        for name in names.split(", "):
            name = collapse_spaces(name)
            entity.add("alias", name)

    entity.add("topics", "sanction")
    context.emit(entity, target=True)
    context.emit(sanction)
Exemplo n.º 15
0
def crawl(context: Context):
    path = context.fetch_resource("source.json", context.dataset.data.url)
    context.export_resource(path, JSON, title=context.SOURCE_TITLE)
    with open(path, "r") as fh:
        data = json.load(fh)
    for entry in data.get("result", []):
        wallet = context.make("CryptoWallet", target=True)
        wallet.id = context.make_slug(entry.get("address"))
        wallet.add("publicKey", entry.pop("address"))
        wallet.add("topics", "crime.theft")
        wallet.add("createdAt", entry.pop("createdAt"))
        wallet.add("modifiedAt", entry.pop("updatedAt"))
        wallet.add("alias", entry.pop("family"))
        wallet.add("balance", format_number(entry.pop("balance")))
        wallet.add("amountUsd", format_number(entry.pop("balanceUSD")))
        wallet.add("currency", entry.pop("blockchain"))
        h.audit_data(entry, ignore=["transactions"])
        context.emit(wallet)
Exemplo n.º 16
0
def crawl_physical(context: Context) -> None:
    data = json_resource(context, PHYSICAL_URL, "physical")
    for row in data:
        entity = context.make("Person")
        entity.id = context.make_slug(row.pop("ukaz_id"), row.pop("index"))
        entity.add("name", row.pop("name_ukr", None))
        entity.add("name", row.pop("name_original", None))
        for alias in multi_split(row.pop("name_alternative", None),
                                 [";", "/"]):
            entity.add("alias", alias)
        entity.add("notes", row.pop("additional", None))
        for country in multi_split(row.pop("citizenship", None), [", "]):
            entity.add("nationality", country)
        entity.add("birthDate", row.pop("birthdate", None))
        entity.add("birthPlace", row.pop("birthplace", None))
        entity.add("position", row.pop("occupation", None))
        handle_address(context, entity, row.pop("livingplace", None))
        handle_sanction(context, entity, row)

        context.emit(entity, target=True)
Exemplo n.º 17
0
def crawl_legal(context: Context) -> None:
    data = json_resource(context, LEGAL_URL, "legal")
    for row in data:
        entity = context.make("Organization")
        entity.id = context.make_slug(row.pop("ukaz_id"), row.pop("index"))
        entity.add("name", row.pop("name_ukr", None))
        entity.add("name", row.pop("name_original", None))
        for alias in multi_split(row.pop("name_alternative", None),
                                 [";", "/"]):
            entity.add("alias", alias)
        entity.add("notes", row.pop("additional", None))
        ipn = row.pop("ipn", "") or ""
        entity.add("taxNumber", ipn.replace("ІПН", ""))
        odrn = row.pop("odrn_edrpou", "") or ""
        entity.add("registrationNumber", odrn.replace("ОДРН", ""))

        handle_address(context, entity, row.pop("place", None))
        handle_address(context, entity, row.pop("place_alternative", None))
        handle_sanction(context, entity, row)
        # context.pprint(row)
        context.emit(entity, target=True)
Exemplo n.º 18
0
def crawl(context: Context):
    path = context.fetch_resource("source.xml", context.dataset.data.url)
    context.export_resource(path, XML, title=context.SOURCE_TITLE)
    doc = context.parse_resource_xml(path)
    doc = h.remove_namespace(doc)
    for node in doc.findall("./entry"):
        entity = context.make("Organization")
        name = node.findtext("./title")
        entity.id = context.make_slug(node.findtext("./id"), name)
        entity.add("name", name)

        link = node.find("./link").get("href")
        entity.add("sourceUrl", link)
        aliases = node.findtext("./summary")
        if aliases != "N/A":
            aliases = aliases.split(", ")
            entity.add("alias", aliases)
        entity.add("notes", node.findtext("./content"))
        entity.add("createdAt", node.findtext("./published"))
        entity.add("modifiedAt", node.findtext("./updated"))
        entity.add("topics", "crime.terror")

        context.emit(entity, target=True)
Exemplo n.º 19
0
def parse_common(context: Context, entity, node):
    entity.id = context.make_slug(node.findtext("./DATAID"))
    h.apply_name(
        entity,
        given_name=node.findtext("./FIRST_NAME"),
        second_name=node.findtext("./SECOND_NAME"),
        name3=node.findtext("./THIRD_NAME"),
        name4=node.findtext("./FOURTH_NAME"),
        quiet=True,
    )
    entity.add("alias", node.findtext("./NAME_ORIGINAL_SCRIPT"))
    entity.add("notes", h.clean_note(node.findtext("./COMMENTS1")))
    entity.add("topics", "sanction")

    sanction = h.make_sanction(context, entity)
    entity.add("createdAt", node.findtext("./LISTED_ON"))
    sanction.add("listingDate", node.findtext("./LISTED_ON"))
    sanction.add("startDate", node.findtext("./LISTED_ON"))
    sanction.add("modifiedAt", values(node.find("./LAST_DAY_UPDATED")))
    entity.add("modifiedAt", values(node.find("./LAST_DAY_UPDATED")))
    sanction.add("program", node.findtext("./UN_LIST_TYPE"))
    sanction.add("unscId", node.findtext("./REFERENCE_NUMBER"))
    return sanction
Exemplo n.º 20
0
def crawl_country(context: Context, params, path, country):
    source_url = UI_URL % path
    context.log.debug("Crawling country: %s" % country)
    res = context.fetch_json(DATA_URL % path, params=params)
    data = res.get("result", {}).get("data", {}).get("page", {})
    blocks = data.get("acf", {}).get("blocks", [{}])[0]
    content = blocks.get("free_form_content", []).get("content")
    doc = html.fromstring(content)
    function = None
    for i, el in enumerate(doc.getchildren()):
        text = el.text_content().strip()
        if el.tag == "h2":
            continue
        if el.tag == "h3":
            function = text
            continue
        if i == 0 and el.tag == "p":
            # this paragraph at the start is a note, not a person
            continue
        name = text.replace("(Acting)", "")
        if is_empty(name):
            continue
        context.log.debug(
            "Person",
            country=country,
            name=name,
            function=function,
            url=source_url,
        )
        person = context.make("Person")
        person.id = context.make_slug(country, name, function)
        person.add("name", name)
        person.add("country", country)
        person.add("position", function)
        person.add("sourceUrl", source_url)
        person.add("topics", "role.pep")
        context.emit(person, target=True)
Exemplo n.º 21
0
def crawl_entity(context: Context, data):
    # context.pprint(data)
    nature = data.pop("Nature")
    schema = SCHEMATA.get(nature)
    if schema is None:
        context.log.error("Unknown entity type", nature=nature)
        return
    entity = context.make(schema)
    entity.id = context.make_slug(data.pop("IdRegistre"))
    sanction = h.make_sanction(context, entity)
    for detail in data.pop("RegistreDetail"):
        field = detail.pop("TypeChamp")
        for value in detail.pop("Valeur"):
            apply_prop(context, entity, sanction, field, value)

    name = data.pop("Nom")
    h.apply_name(
        entity,
        first_name=entity.first("firstName", quiet=True),
        tail_name=name,
        quiet=True,
    )
    entity.add("topics", "sanction")
    context.emit(entity, target=True)
Exemplo n.º 22
0
def parse_entry(context: Context, target, programs, places, updated_at):
    entity = context.make("LegalEntity")
    node = target.find("./entity")
    if node is None:
        node = target.find("./individual")
        entity = context.make("Person")
    if node is None:
        node = target.find("./object")
        object_type = node.get("object-type")
        if object_type != "vessel":
            context.log.warning("Unknown target type",
                                target=target,
                                object_type=object_type)
        entity = context.make("Vessel")

    entity.id = context.make_slug(target.get("ssid"))
    entity.add("gender", node.get("sex"), quiet=True)
    for other in node.findall("./other-information"):
        value = other.text.strip()
        if entity.schema.is_a("Vessel") and value.lower().startswith("imo"):
            _, imo_num = value.split(":", 1)
            entity.add("imoNumber", imo_num)
        else:
            entity.add("notes", h.clean_note(value))

    sanction = h.make_sanction(context, entity)
    dates = set()
    for mod in target.findall("./modification"):
        dates.add(mod.get("publication-date"))
        sanction.add("listingDate", mod.get("publication-date"))
        sanction.add("startDate", mod.get("effective-date"))
    dates_ = [d for d in dates if d is not None]
    if len(dates_):
        entity.add("createdAt", min(dates_))
        entity.add("modifiedAt", max(dates_))

    ssid = target.get("sanctions-set-id")
    sanction.add("program", programs.get(ssid))

    for justification in node.findall("./justification"):
        # TODO: should this go into sanction:reason?
        entity.add("notes", h.clean_note(justification.text))

    for relation in node.findall("./relation"):
        rel_type = relation.get("relation-type")
        target_id = context.make_slug(relation.get("target-id"))
        res = context.lookup("relations", rel_type)
        if res is None:
            context.log.warn(
                "Unknown relationship type",
                type=rel_type,
                source=entity,
                target=target_id,
            )
            continue

        rel = context.make(res.schema)
        rel.id = context.make_slug(relation.get("ssid"))
        rel.add(res.source, entity.id)
        rel.add(res.target, target_id)
        rel.add(res.text, rel_type)

        # rel_target = context.make(rel.schema.get(res.target).range)
        # rel_target.id = target_id
        # context.emit(rel_target)

        entity.add_schema(rel.schema.get(res.source).range)
        context.emit(rel)

    for identity in node.findall("./identity"):
        parse_identity(context, entity, identity, places)

    entity.add("topics", "sanction")
    context.emit(entity, target=True)
    context.emit(sanction)
Exemplo n.º 23
0
def crawl_item(context: Context, listing):
    links = listing.get("links", {})
    url = urljoin(API_URL, links.get("self"))
    data = http_get(context, url, cache_days=14)
    person = context.make("Person")
    _, officer_id = url.rsplit("/", 1)
    person.id = context.make_slug(officer_id)

    person.add("name", listing.get("title"))
    person.add("notes", listing.get("description"))
    person.add("topics", "crime")
    source_url = urljoin(WEB_URL, links.get("self"))
    person.add("sourceUrl", source_url)

    last_name = data.pop("surname", None)
    person.add("lastName", last_name)
    forename = data.pop("forename", None)
    person.add("firstName", forename)
    other_forenames = data.pop("other_forenames", None)
    person.add("middleName", other_forenames)
    person.add("title", data.pop("title", None))

    nationality = data.pop("nationality", None)
    if nationality is not None:
        person.add("nationality", nationality.split(","))
    person.add("birthDate", data.pop("date_of_birth", None))
    person.add("topics", "crime")

    address = listing.get("address", {})
    address = h.make_address(
        context,
        full=listing.get("address_snippet"),
        street=address.get("address_line_1"),
        street2=address.get("premises"),
        city=address.get("locality"),
        postal_code=address.get("postal_code"),
        region=address.get("region"),
        # country_code=person.first("nationality"),
    )
    h.apply_address(context, person, address)

    for disqual in data.pop("disqualifications", []):
        case_id = disqual.get("case_identifier")
        sanction = h.make_sanction(context, person, key=case_id)
        sanction.add("recordId", case_id)
        sanction.add("startDate", disqual.get("disqualified_from"))
        sanction.add("endDate", disqual.get("disqualified_until"))
        sanction.add("listingDate", disqual.get("undertaken_on"))
        for key, value in disqual.get("reason", {}).items():
            value = value.replace("-", " ")
            reason = f"{key}: {value}"
            sanction.add("reason", reason)
        sanction.add("country", "gb")
        context.emit(sanction)

        address = disqual.get("address", {})
        address = h.make_address(
            context,
            full=listing.get("address_snippet"),
            street=address.get("address_line_1"),
            street2=address.get("premises"),
            city=address.get("locality"),
            postal_code=address.get("postal_code"),
            region=address.get("region"),
            # country_code=person.first("nationality"),
        )

        for company_name in disqual.get("company_names", []):
            company = context.make("Company")
            company.id = context.make_slug("named", company_name)
            company.add("name", company_name)
            company.add("jurisdiction", "gb")
            context.emit(company)
            h.apply_address(context, company, address)

            directorship = context.make("Directorship")
            directorship.id = context.make_id(person.id, company.id)
            directorship.add("director", person)
            directorship.add("organization", company)
            context.emit(directorship)

    context.emit(person, target=True)
Exemplo n.º 24
0
def company_id(context: Context, id: str):
    return context.make_slug("company", id)
Exemplo n.º 25
0
def short_id(context: Context, id: str) -> str:
    id = id.replace(context.make_slug("person"), "p")
    id = id.replace(context.make_slug("company"), "c")
    return id
Exemplo n.º 26
0
def crawl_person(context: Context, data: Dict[str, Any]):
    is_pep = data.pop("is_pep", False)
    entity = context.make("Person", target=is_pep)
    wikidata_id = clean_wdid(data.pop("wikidata_id", None))
    entity.id = person_id(context, data.pop("id"), wikidata_id)
    entity.add("sourceUrl", data.pop("url_en", None))
    data.pop("url_ru", None)
    entity.add("modifiedAt", data.pop("last_change", None))
    entity.add("wikidataId", wikidata_id)
    entity.add("name", data.pop("full_name_en", None))
    entity.add("name", data.pop("full_name_ru", None))
    entity.add("alias", data.pop("inversed_full_name_en", None))
    entity.add("alias", data.pop("inversed_full_name_ru", None))
    entity.add("alias", data.pop("also_known_as_en", None))
    entity.add("alias", data.pop("also_known_as_ru", None))
    entity.add("alias", split_names(data.pop("names", [])))
    entity.add("birthDate", parse_date(data.pop("date_of_birth", None)))
    entity.add("deathDate", parse_date(data.pop("termination_date_human", None)))
    entity.add("birthPlace", data.pop("city_of_birth_ru", None))
    entity.add("birthPlace", data.pop("city_of_birth_en", None))
    entity.add("innCode", data.pop("inn", None))
    entity.add("firstName", data.pop("first_name_en", None))
    entity.add("firstName", data.pop("first_name_ru", None))
    entity.add("fatherName", data.pop("patronymic_en", None))
    entity.add("fatherName", data.pop("patronymic_ru", None))
    entity.add("lastName", data.pop("last_name_en", None))
    entity.add("lastName", data.pop("last_name_ru", None))

    for suffix in ("", "_en", "_ru"):
        role = data.pop(f"last_job_title{suffix}", None)
        org = data.pop(f"last_workplace{suffix}", None)
        if org is None or not len(org.strip()):
            continue
        position = org
        if role is not None and len(role.strip()):
            position = f"{org} ({role})"
        entity.add("position", position)

    for country_data in data.pop("related_countries", []):
        rel_type = country_data.pop("relationship_type")
        country_name = country_data.pop("to_country_en", None)
        country_name = country_name or country_data.pop("to_country_ru")
        # print(country_name)
        res = context.lookup("country_links", rel_type)
        if res is None:
            context.log.warn(
                "Unknown country link",
                rel_type=rel_type,
                entity=entity,
                country=country_name,
            )
            continue
        if res.prop is not None:
            entity.add(res.prop, country_name)
        # h.audit_data(country_data)

    for rel_data in data.pop("related_persons", []):
        other_pep = rel_data.pop("is_pep", False)
        other_wdid = clean_wdid(rel_data.pop("person_wikidata_id"))
        other = context.make("Person", target=other_pep)
        other.id = person_id(context, rel_data.pop("person_id"), other_wdid)
        other.add("name", rel_data.pop("person_en", None))
        other.add("name", rel_data.pop("person_ru", None))
        other.add("wikidataId", other_wdid)

        rel_type = rel_data.pop("relationship_type_en", None)
        rel_type_ru = rel_data.pop("relationship_type_ru", None)
        rel_type = rel_type or rel_type_ru
        res = context.lookup("person_relations", rel_type)
        if res is None:
            context.log.warn(
                "Unknown person/person relation type",
                rel_type=rel_type,
                entity=entity,
                other=other,
            )
            continue

        # print("LINK", (entity.id, other.id))
        id_a, id_b = sorted((entity.id, other.id))
        rel = context.make(res.schema)
        id_a_short = short_id(context, id_a)
        id_b_short = short_id(context, id_b)
        rel.id = context.make_slug(id_a_short, res.schema, id_b_short)
        rel.add(res.from_prop, id_a)
        rel.add(res.to_prop, id_b)
        rel.add(res.desc_prop, rel_type)
        rel.add("modifiedAt", parse_date(rel_data.pop("date_confirmed")))
        rel.add("startDate", parse_date(rel_data.pop("date_established")))
        rel.add("endDate", parse_date(rel_data.pop("date_finished")))

        # h.audit_data(rel_data)
        context.emit(other, target=other_pep)
        context.emit(rel)

    data.pop("type_of_official_ru", None)
    person_type = data.pop("type_of_official_en", None)
    person_topic = context.lookup_value("person_type", person_type)
    if person_topic is None:
        context.log.warn("Unknown type of official", type=person_type)
    entity.add("topics", person_topic)
    if is_pep:
        entity.add("topics", "role.pep")
    entity.add("status", person_type)

    data.pop("died", None)
    data.pop("tags", None)
    data.pop("reason_of_termination_en", None)
    data.pop("reason_of_termination_ru", None)
    # TODO: store images
    data.pop("photo", None)
    data.pop("related_companies", None)
    data.pop("declarations", None)
    # h.audit_data(data)
    context.emit(entity, target=is_pep)
Exemplo n.º 27
0
def parse_relation(context: Context, el, parties):
    type_id = el.get("RelationTypeID")
    type_ = ref_value("RelationType", el.get("RelationTypeID"))
    from_id = context.make_slug(el.get("From-ProfileID"))
    from_party = parties.get(from_id)
    if from_party is None:
        context.log.warn("Missing relation 'from' party", entity_id=from_id, type=type_)
        return
    to_id = context.make_slug(el.get("To-ProfileID"))
    to_party = parties.get(to_id)
    if to_party is None:
        context.log.warn("Missing relation 'to' party", entity_id=to_id, type=type_)
        return

    # if type_id == "15003":
    #     print(
    #         "REL",
    #         from_party.caption,
    #         from_party.schema.name,
    #         type_,
    #         to_party.caption,
    #         to_party.schema.name,
    #     )

    relation = lookup("relations", type_id)
    if relation is None:
        context.log.warn(
            "Unknown relation type",
            type_id=type_id,
            type_value=type_,
            from_party=from_party,
            to_party=to_party,
        )
        return
    entity = context.make(relation.schema)
    from_range = entity.schema.get(relation.from_prop).range
    to_range = entity.schema.get(relation.to_prop).range

    # HACK: Looks like OFAC just has some link in a direction that makes no
    # semantic sense, so we're flipping them here.
    if disjoint_schema(from_party, from_range) or disjoint_schema(to_party, to_range):
        # context.log.warning(
        #     "Flipped relation",
        #     from_party=from_party,
        #     to_party=to_party,
        #     schema=entity.schema,
        #     type=type_,
        # )
        from_party, to_party = to_party, from_party

    add_schema(from_party, from_range)
    add_schema(to_party, to_range)
    context.emit(from_party, target=True)
    context.emit(to_party, target=True)
    entity.id = context.make_id("Relation", from_party.id, to_party.id, el.get("ID"))
    entity.add(relation.from_prop, from_party)
    entity.add(relation.to_prop, to_party)
    entity.add(relation.description_prop, type_)
    entity.add("summary", el.findtext("./Comment"))
    context.emit(entity)
    context.log.debug("Relation", from_=from_party, type=type_, to=to_party)
Exemplo n.º 28
0
def crawl_person(context: Context, name, url):
    context.log.debug("Crawling member", name=name, url=url)
    doc = context.fetch_html(url)
    _, person_id = url.rsplit("/", 1)
    person = context.make("Person")
    person.id = context.make_slug(person_id)
    person.add("sourceUrl", url)
    person.add("name", name)
    person.add("topics", "role.pep")

    last_name, first_name = name.split(", ", 1)
    person.add("firstName", first_name)
    person.add("lastName", last_name)

    address = {}
    details = doc.find('.//div[@class="regular-details"]')
    for row in details.findall('.//ul[@class="no-bullet"]/li'):
        children = row.getchildren()
        title = children[0]
        title_text = collapse_spaces(stringify(title.text_content()))
        title_text = title_text or title.get("class")
        value = collapse_spaces(title.tail)
        if title_text in ("Full name:", "Address:",
                          "Declaration of interests"):
            # ignore these.
            continue
        if title_text == "Emails:":
            emails = [e.text for e in row.findall(".//a")]
            person.add("email", emails)
            continue
        if "glyphicon-phone" in title_text:
            person.add("phone", value.split(","))
            continue
        if "fa-fax" in title_text:
            # TODO: yeah, no
            # person.add("phone", value)
            continue
        if title_text in ("Web sites:", "list-inline"):
            sites = [e.get("href") for e in row.findall(".//a")]
            person.add("website", sites)
            continue
        if title_text == "Represented Country:":
            person.add("country", value)
            continue
        if title_text == "Languages:":
            # TODO: missing in FtM
            # person.add("languages", value.split(','))
            continue
        if "Regions since:" in title_text:
            date = h.parse_date(value, FORMATS)
            person.add("createdAt", date)
            continue
        if "Date of birth:" in title_text:
            person.add("birthDate", h.parse_date(value, FORMATS))
            continue
        if "Commissions:" in title_text:
            for com in row.findall(".//li"):
                text = collapse_spaces(com.text_content())
                sep = "Mandate - "
                if sep in text:
                    _, text = text.split(sep, 1)
                person.add("sector", text)
            continue
        if "Areas of interest:" in title_text:
            for area in row.findall(".//li"):
                person.add("keywords", area.text_content())
            continue
        if title.tag == "i" and value is None:
            person.add("position", title_text)
            continue
        if title_text in ("Country:"):
            person.add("country", value)
        if title_text in ("Street:", "Postal code:", "City:", "Country:"):
            address[title_text.replace(":", "")] = value
            continue
        if title_text == "Political group:":
            group = context.make("Organization")
            group.add("name", value)
            slug = value
            if "(" in slug:
                _, slug = slug.rsplit("(", 1)
            slug = slugify(slug, sep="-")
            group.id = f"eu-cor-group-{slug}"
            context.emit(group)
            member = context.make("Membership")
            member.id = context.make_id("Membership", person.id, group.id)
            member.add("member", person)
            member.add("organization", group)
            context.emit(member)
            continue

    address = h.make_address(
        context,
        street=address.get("Street"),
        city=address.get("City"),
        postal_code=address.get("Posal code"),
        country=address.get("Country"),
    )
    h.apply_address(context, person, address)
    context.emit(person, target=True)
Exemplo n.º 29
0
def crawl(context: Context):
    for page in count(1):
        url = str(context.dataset.data.url)
        url = url.replace("pPageNumber=1", "pPageNumber=%s" % page)
        headers = {
            "Accept":
            "application/json",
            "Referer":
            "https://www.iadb.org/en/transparency/sanctioned-firms-and-individuals",
        }
        res = context.http.get(url, headers=headers)
        ids = []
        for row in res.json():
            for field, value in list(row.items()):
                if value == "N/A":
                    row[field] = ""
            row_id = row.pop("id")
            ids.append(row_id)
            entity_type = row.pop("entity")
            schema = context.lookup_value("types", entity_type)
            if schema is None:
                context.log.warning("Unknown entity type", entity=entity_type)
                continue
            entity = context.make(schema)
            entity.id = context.make_slug(row_id)
            entity.add("name", row.pop("firmName"))
            entity.add("topics", "debarment")
            entity.add("alias", row.pop("additionalName"))
            entity.add("notes", row.pop("title"))
            entity.add("notes", row.pop("additionalTitle"))
            entity.add("country", parse_countries(row.pop("country")))

            nat = "nationality"
            if schema == "Company":
                nat = "jurisdiction"
            entity.add(nat, parse_countries(row.pop("nationality")))

            affiliated = row.pop("affiliatedWithEntityId")
            if len(affiliated):
                link = context.make("UnknownLink")
                link.id = context.make_id(row_id, affiliated)
                link.add("subject", entity.id)
                link.add("object", context.make_slug(affiliated))
                context.emit(link)

            sanction = h.make_sanction(context, entity)
            sanction.add("status", row.pop("statusName"))
            sanction.add("reason", row.pop("grounds"))
            sanction.add("authority", row.pop("source"))
            sanction.add("authority", row.pop("idBinstSource"))
            sanction.add("program", row.pop("idBinstType"))
            sanction.add("startDate", h.parse_date(row.pop("datefrom"),
                                                   FORMATS))
            sanction.add("endDate", h.parse_date(row.pop("dateto"), FORMATS))
            # context.pprint(row)

            context.emit(sanction)
            context.emit(entity, target=True)

        if min(ids) == 1:
            return
Exemplo n.º 30
0
def crawl(context: Context):
    path = context.fetch_resource("source.html", context.dataset.data.url)
    context.export_resource(path, HTML, title=context.SOURCE_TITLE)
    with open(path, "r") as fh:
        doc = html.parse(fh)
    tables = doc.findall(".//table")
    assert len(tables) == 1
    rows = tables[0].findall(".//tr")
    for row in rows[2:]:
        cells = [
            collapse_spaces(c.text_content()) for c in row.findall("./td")
        ]
        index = cells[0]
        body = cells[1]
        decision = cells[2]
        un_id = cells[3]
        listing_date = cells[4]
        entity = context.make("Thing")
        entity.id = context.make_slug(index, un_id)
        entity.add("notes", cells[5])

        sanction = h.make_sanction(context, entity)
        sanction.add("listingDate", clean_date(listing_date))
        sanction.add("program", decision)
        sanction.add("recordId", un_id)

        body, gender = maybe_rsplit(body, "пол:")
        entity.add_cast("Person", "gender", h.clean_gender(gender))
        body, gender = maybe_rsplit(body, "Пол:")
        entity.add_cast("Person", "gender", h.clean_gender(gender))
        body, location = maybe_rsplit(body, "местонахождение:")
        entity.add_cast("LegalEntity", "country", location)
        body, imo_num = maybe_rsplit(body, "Присвоенный ИМО номер компании:")
        body, imo_num = maybe_rsplit(body, "Номер ИМО:")
        body, emails = maybe_rsplit(body, "Адрес эл. почты:")
        for email in letter_split(emails):
            entity.add_cast("LegalEntity", "email", email)
        body, fax = maybe_rsplit(body, "Номер факса:")
        body, fax = maybe_rsplit(body, "Факс:")
        body, phones = maybe_rsplit(body, "Номера телефонов:")
        for phone in letter_split(phones):
            entity.add_cast("LegalEntity", "phone", phone)
        body, phones = maybe_rsplit(body, "Тел.:")
        for phone in letter_split(phones):
            entity.add_cast("LegalEntity", "phone", phone)
        body, swift = maybe_rsplit(body, "СВИФТ-код:")
        entity.add_cast("LegalEntity", "swiftBic", swift)
        body, swift = maybe_rsplit(body, "СВИФТ/БИК-код:")
        entity.add_cast("LegalEntity", "swiftBic", swift)

        body, other_info = maybe_rsplit(body, "Прочая информация:")
        entity.add_cast("Thing", "notes", other_info)
        body, listing_date = maybe_rsplit(body, "Дата внесения в перечень:")
        body, addresses = maybe_rsplit(body, "Адрес:")
        for address in letter_split(addresses):
            country = address
            if ", " in country:
                country = address.rsplit(", ", 1)
            code = registry.country.clean(country, fuzzy=True)
            obj = h.make_address(context, full=address, country_code=code)
            h.apply_address(context, entity, obj)
            entity.add("country", code)
        body, national_ids = maybe_rsplit(
            body, "Национальный идентификационный номер:")
        for national_id in letter_split(national_ids):
            entity.add_cast("LegalEntity", "idNumber", national_id)
        body, passport_nos = maybe_rsplit(body, "Паспорт №:")
        for passport_no in letter_split(passport_nos):
            entity.add_cast("Person", "passportNumber", passport_no)
        body, citizenship = maybe_rsplit(body, "Гражданство:")
        entity.add_cast("Person", "nationality", citizenship)
        aka = "На основании менее достоверных источников также известен как:"
        body, aka = maybe_rsplit(body, aka)
        entity.add("alias", letter_split(aka))
        strong_aka = "На основании достоверных источников также известен как:"
        body, strong_aka = maybe_rsplit(body, strong_aka)
        entity.add("alias", letter_split(strong_aka))
        body, rik_no = maybe_rsplit(body, "Р.И.К.:")

        body, birth_place = maybe_rsplit(body, "Место рождения:")
        entity.add_cast("Person", "birthPlace", birth_place)
        body, birth_dates = maybe_rsplit(body, "Дата рождения:")
        for birth_date in letter_split(birth_dates):
            entity.add_cast("Person", "birthDate", clean_date(birth_date))
        body, position = maybe_rsplit(body, "Должность:")
        entity.add_cast("Person", "position", position)
        body, job = maybe_rsplit(body, "Обращение:")
        entity.add_cast("Person", "position", job)

        body, aliases = maybe_rsplit(body, "Другие названия:")
        entity.add("alias", letter_split(aliases))

        body, aliases = maybe_rsplit(body, "Вымышленные названия:")
        entity.add("alias", letter_split(aliases))

        names = body.split(", ")
        entity.add("name", names)
        # context.pprint(names)

        if entity.schema.name == "Thing":
            entity.schema = model.get("LegalEntity")

        context.emit(entity, target=True)
        context.emit(sanction)