示例#1
0
def parse_alias(party, parts, alias):
    # primary = as_bool(alias.get("Primary"))
    is_weak = as_bool(alias.get("LowQuality"))
    alias_type = ref_value("AliasType", alias.get("AliasTypeID"))
    name_prop = ALIAS_TYPES[alias_type]
    for name in alias.findall("./DocumentedName"):
        names = defaultdict(lambda: "")
        for value in name.findall("./DocumentedNamePart/NamePartValue"):
            type_ = parts.get(value.get("NamePartGroupID"))
            names[type_] = " ".join([names[type_], value.text]).strip()

        h.apply_name(
            party,
            full=names.pop("Entity Name", None),
            name_prop=name_prop,
            is_weak=is_weak,
        )
        party.add("name", names.pop("Vessel Name", None))
        party.add("weakAlias", names.pop("Nickname", None))
        party.add("registrationNumber", names.pop("Aircraft Name", None))
        h.apply_name(
            party,
            first_name=names.pop("First Name", None),
            middle_name=names.pop("Middle Name", None),
            maiden_name=names.pop("Maiden Name", None),
            last_name=names.pop("Last Name", None),
            matronymic=names.pop("Matronymic", None),
            patronymic=names.pop("Patronymic", None),
            is_weak=is_weak,
            name_prop=name_prop,
        )
        h.audit_data(names)
示例#2
0
def parse_name(entity, node):
    name_prop = NAME_TYPE[node.get("name-type")]
    is_weak = NAME_QUALITY_WEAK[node.get("quality")]

    parts = defaultdict(dict)
    for part in node.findall("./name-part"):
        part_type = part.get("name-part-type")
        value = part.findtext("./value")
        parts[None][part_type] = value

        for spelling in part.findall("./spelling-variant"):
            key = (spelling.get("lang"), spelling.get("script"))
            parts[key][part_type] = spelling.text

    for key, parts in parts.items():
        entity.add("title", parts.pop("title", None), quiet=True)
        entity.add("title", parts.pop("suffix", None), quiet=True)
        entity.add("weakAlias", parts.pop("other", None), quiet=True)
        entity.add("weakAlias", parts.pop("tribal-name", None), quiet=True)
        entity.add("fatherName",
                   parts.pop("grand-father-name", None),
                   quiet=True)
        h.apply_name(
            entity,
            full=parts.pop("whole-name", None),
            given_name=parts.pop("given-name", None),
            second_name=parts.pop("further-given-name", None),
            patronymic=parts.pop("father-name", None),
            last_name=parts.pop("family-name", None),
            maiden_name=parts.pop("maiden-name", None),
            is_weak=is_weak,
            name_prop=name_prop,
            quiet=True,
        )
        h.audit_data(parts)
示例#3
0
def crawl(context: Context):
    path = context.fetch_resource("source.html", context.dataset.data.url)
    context.export_resource(path, HTML, title=context.SOURCE_TITLE)
    with open(path, "r", encoding="utf-8") as fh:
        doc = html.fromstring(fh.read())
    for table in doc.findall('.//div[@class="editor-content"]//table'):
        headers = None
        schema = None
        for row in table.findall(".//tr"):
            cells = [
                collapse_spaces(c.text_content()) for c in row.findall("./td")
            ]
            if headers is None:
                headers = [slugify(c, sep="_") for c in cells]
                continue
            if len(cells) == 1:
                schema = TYPES[cells[0]]
                continue
            row = dict(zip(headers, cells))

            entity = context.make(schema)
            name = row.pop("imie_i_nazwisko_nazwa_podmiotu")
            entity.id = context.make_slug(name)
            names = name.split("(")
            entity.add("name", names[0])
            for alias in names[1:]:
                entity.add("alias", alias.split(")")[0])
            notes = row.pop("uzasadnienie_wpisu_na_liste")
            entity.add("notes", notes)

            details = row.pop("dane_identyfikacyjne_osoby_podmiotu")
            for (chop, prop) in CHOPSKA:
                parts = details.rsplit(chop, 1)
                details = parts[0]
                if len(parts) > 1:
                    if prop == "address":
                        addr = h.make_address(context, full=parts[1])
                        h.apply_address(context, entity, addr)
                    else:
                        entity.add(prop, parts[1])
            if len(details.strip()):
                result = context.lookup("details", details)
                if result is None:
                    context.log.warning("Unhandled details", details=details)
                else:
                    for prop, value in result.props.items():
                        entity.add(prop, value)

            sanction = h.make_sanction(context, entity)
            provisions = row.pop("zastosowane_srodki_sankcyjne")
            sanction.add("provisions", provisions)

            start_date = row.pop("data_umieszczenia_na_liscie")
            start_date = start_date.replace(" r.", "")
            sanction.add("startDate", h.parse_date(start_date, ["%d.%m.%Y"]))

            h.audit_data(row)
            context.emit(entity, target=True)
            context.emit(sanction)
示例#4
0
def parse_alias(entity: Entity, alias: Dict[str, str]):
    name_prop = NAME_QUALITY[alias.pop("QUALITY", None)]
    h.apply_name(
        entity,
        full=alias.pop("ALIAS_NAME", None),
        quiet=True,
        name_prop=name_prop,
    )
    h.audit_data(alias, ignore=["NOTE"])
示例#5
0
def crawl(context: Context):
    path = context.fetch_resource("source.json", context.dataset.data.url)
    context.export_resource(path, JSON, title=context.SOURCE_TITLE)
    with open(path, "r") as fh:
        data = json.load(fh)
    for entry in data.get("result", []):
        wallet = context.make("CryptoWallet", target=True)
        wallet.id = context.make_slug(entry.get("address"))
        wallet.add("publicKey", entry.pop("address"))
        wallet.add("topics", "crime.theft")
        wallet.add("createdAt", entry.pop("createdAt"))
        wallet.add("modifiedAt", entry.pop("updatedAt"))
        wallet.add("alias", entry.pop("family"))
        wallet.add("balance", format_number(entry.pop("balance")))
        wallet.add("amountUsd", format_number(entry.pop("balanceUSD")))
        wallet.add("currency", entry.pop("blockchain"))
        h.audit_data(entry, ignore=["transactions"])
        context.emit(wallet)
示例#6
0
def crawl_entities(context: Context):
    for data in fetch(context, "entidades"):
        entity = crawl_common(context, data, "entidades", "Organization")
        entity.add("incorporationDate", data.pop("DATE_OF_BIRTH", None))
        entity.add("incorporationDate", data.pop("YEAR", None))
        data.pop("CITY_OF_BIRTH", None)
        entity.add("country", data.pop("COUNTRY_OF_BIRTH", None))

        for addr in data.pop("ENTITY_ADDRESS", []):
            address = parse_address(context, addr)
            h.apply_address(context, entity, address)

        for alias in data.pop("ENTITY_ALIAS", []):
            entity.add("incorporationDate", alias.pop("DATE_OF_BIRTH", None))
            entity.add("incorporationDate", alias.pop("YEAR", None))
            # entity.add("birthPlace", alias.pop("CITY_OF_BIRTH", None))
            entity.add("country", alias.pop("COUNTRY_OF_BIRTH", None))
            parse_alias(entity, alias)

        h.audit_data(data, ["VERSIONNUM"])
        context.emit(entity, target=True)
示例#7
0
def crawl_individuals(context: Context):
    path = context.fetch_resource("individuals.xlsx", PEOPLE_URL)
    context.export_resource(path, XLSX, title=context.SOURCE_TITLE)
    for record in excel_records(path):
        seq_id = record.pop("internal_seq_id", None)
        if seq_id is None:
            continue
        name_en = record.pop("name_of_individual_english", None)
        name_he = record.pop("name_of_individual_hebrew", None)
        name_ar = record.pop("name_of_individual_arabic", None)
        entity = context.make("Person")
        entity.id = context.make_id(name_en, name_he, name_ar)
        if entity.id is None:
            continue
        entity.add("name", name_en or name_he or name_ar)
        entity.add("alias", name_he)
        entity.add("alias", name_ar)
        entity.add("topics", "crime.terror")
        entity.add("birthDate", parse_date(record.pop("d_o_b", None)))
        entity.add("nationality", record.pop("nationality_residency", None))
        entity.add("idNumber", record.pop("individual_id", None))

        sanction = h.make_sanction(context, entity)
        sanction.add("recordId", seq_id)
        sanction.add("recordId", record.pop("foreign_designation_id", None))
        sanction.add("program", record.pop("designation", None))
        sanction.add("program", record.pop("foreign_designation", None))
        sanction.add("authority", lang_pick(record, "designated_by"))

        lang_pick(record, "designated_by_abroad")
        record.pop("date_of_foreign_designation_date", None)

        for field in ("date_of_designation_in_israel",):
            parse_interval(sanction, record.pop(field, None))

        context.emit(entity, target=True)
        context.emit(sanction)
        h.audit_data(record)
示例#8
0
def crawl_organizations(context: Context):
    path = context.fetch_resource("organizations.xlsx", ORG_URL)
    context.export_resource(path, XLSX, title=context.SOURCE_TITLE)
    seq_ids = {}
    links = []
    for record in excel_records(path):
        seq_id = record.pop("internal_seq_id", None)
        name_en = record.pop("organization_name_english", None)
        name_he = record.pop("organization_name_hebrew", None)
        entity = context.make("Organization")
        entity.id = context.make_id(name_en, name_he)
        if entity.id is None:
            continue
        if seq_id is not None:
            seq_ids[seq_id] = entity.id
        entity.add("name", name_en)
        entity.add("name", name_he)
        entity.add("topics", "crime.terror")
        entity.add("notes", h.clean_note(lang_pick(record, "comments")))
        entity.add("notes", h.clean_note(record.pop("column_42", None)))
        entity.add("email", record.pop("email", None))
        entity.add("country", record.pop("country_hebrew", None))
        entity.add("country", record.pop("country_english", None))
        entity.add("registrationNumber", record.pop("corporation_id", None))
        entity.add("legalForm", lang_pick(record, "corporation_type"))
        entity.add("jurisdiction", lang_pick(record, "location_of_formation"))
        date = parse_date(record.pop("date_of_corporation", None))
        entity.add("incorporationDate", date)
        for field in list(record.keys()):
            if field.startswith("organization_name_"):
                entity.add("alias", record.pop(field, None))
            if field.startswith("telephone"):
                entity.add("phone", record.pop(field, None))
            if field.startswith("website"):
                entity.add("website", record.pop(field, None))

        entity.add("phone", record.pop("column_70", None))
        entity.add("website", record.pop("column_73", None))

        sanction = h.make_sanction(context, entity)
        sanction.add("recordId", seq_id)
        sanction.add("recordId", record.pop("seq_num_in_other_countries", None))
        sanction.add("program", record.pop("designation_type", None))
        sanction.add("reason", lang_pick(record, "designation_justification"))
        sanction.add("authority", lang_pick(record, "designated_by"))
        sanction.add("publisher", record.pop("public_records_references", None))

        lang_pick(record, "designated_by_abroad")
        record.pop("date_designated_in_other_countries", None)

        linked = record.pop("linked_to_internal_seq_id", "")
        for link in linked.split(";"):
            links.append((max(link, seq_id), min(link, seq_id)))

        street = lang_pick(record, "street")
        city = lang_pick(record, "city_village")
        if street or city:
            address = h.make_address(
                context, street=street, city=city, country_code=entity.first("country")
            )
            h.apply_address(context, entity, address)

        for field in (
            "date_of_temporary_designation",
            "date_of_permenant_designation",
            "date_designation_in_west_bank",
        ):
            parse_interval(sanction, record.pop(field, None))

        context.emit(entity, target=True)
        context.emit(sanction)
        h.audit_data(record)

    for (subject, object) in links:
        subject_id = seq_ids.get(subject)
        object_id = seq_ids.get(object)
        if subject_id is None or object_id is None:
            continue
        link = context.make("UnknownLink")
        link.id = context.make_id(subject_id, object_id)
        link.add("subject", subject_id)
        link.add("object", object_id)
        context.emit(link)
示例#9
0
def crawl_persons(context: Context):
    for data in fetch(context, "personas"):
        entity = crawl_common(context, data, "personas", "Person")
        entity.add("title", values(data.pop("TITLE", None)))
        entity.add("nationality", values(data.pop("NATIONALITY", None)))
        entity.add("position", values(data.pop("DESIGNATION", None)))
        entity.add("gender", data.pop("GENDER", None))
        entity.add("birthDate", data.pop("DATE_OF_BIRTH", None))
        entity.add("birthDate", data.pop("YEAR", None))
        entity.add("birthPlace", data.pop("CITY_OF_BIRTH", None))
        entity.add("country", data.pop("COUNTRY_OF_BIRTH", None))

        for dob in data.pop("INDIVIDUAL_DATE_OF_BIRTH", []):
            date = parse_date(dob.pop("DATE", None))
            entity.add("birthDate", date)
            date = parse_date(dob.pop("TYPE_OF_DATE", None))
            entity.add("birthDate", date)
            entity.add("birthDate", dob.pop("YEAR", None))
            entity.add("birthDate", dob.pop("FROM_YEAR", None))
            entity.add("birthDate", dob.pop("TO_YEAR", None))
            h.audit_data(dob, ignore=["NOTE"])

        for doc in data.pop("INDIVIDUAL_DOCUMENT", []):
            type_ = doc.pop("TYPE_OF_DOCUMENT", None)
            number = doc.pop("NUMBER", None)
            schema = context.lookup_value("doc_types", type_)
            if schema is None:
                context.log.warning("Unknown document type", type=type_)
                continue
            passport = context.make(schema)
            passport.id = context.make_id("ID", entity.id, number)
            passport.add("holder", entity)
            passport.add("type", type_)
            passport.add("number", number)
            passport.add("type", doc.pop("TYPE_OF_DOCUMENT2", None))
            passport.add("startDate",
                         parse_date(doc.pop("DATE_OF_ISSUE", None)))
            passport.add("country", doc.pop("ISSUING_COUNTRY", None))
            passport.add("country", doc.pop("COUNTRY_OF_ISSUE", None))
            passport.add("summary", doc.pop("NOTE", None))
            context.emit(passport)
            h.audit_data(doc, ignore=["CITY_OF_ISSUE"])

        for addr in data.pop("INDIVIDUAL_ADDRESS", []):
            address = parse_address(context, addr)
            h.apply_address(context, entity, address)

        for addr in data.pop("INDIVIDUAL_PLACE_OF_BIRTH", []):
            address = parse_address(context, addr)
            if address is not None:
                entity.add("birthPlace", address.get("full"))
                entity.add("country", address.get("country"))

        for alias in data.pop("INDIVIDUAL_ALIAS", []):
            entity.add("birthDate", alias.pop("DATE_OF_BIRTH", None))
            entity.add("birthDate", alias.pop("YEAR", None))
            entity.add("birthPlace", alias.pop("CITY_OF_BIRTH", None))
            entity.add("country", alias.pop("COUNTRY_OF_BIRTH", None))
            parse_alias(entity, alias)

        h.audit_data(data, ["VERSIONNUM"])
        context.emit(entity, target=True)
示例#10
0
def crawl_company(context: Context, data: Dict[str, Any]):
    entity = context.make("Organization")
    entity.id = company_id(context, data.pop("id"))
    entity.add("sourceUrl", data.pop("url_en", None))
    data.pop("url_ru", None)
    entity.add("name", data.pop("name_en", None))
    entity.add("name", data.pop("name_ru", None))
    entity.add("name", data.pop("name_suggest_output_ru", None))
    entity.add("alias", data.pop("also_known_as", None))
    entity.add("alias", data.pop("short_name_en", None))
    entity.add("alias", data.pop("short_name_ru", None))
    entity.add("incorporationDate", parse_date(data.pop("founded", None)))
    entity.add("dissolutionDate", parse_date(data.pop("closed", None)))
    entity.add("status", data.pop("status_en", data.pop("status_ru", None)))
    entity.add("status", data.pop("status", None))
    entity.add_cast("Company", "ogrnCode", data.pop("ogrn_code", None))
    entity.add("registrationNumber", data.pop("edrpou", None))

    for country_data in data.pop("related_countries", []):
        rel_type = country_data.pop("relationship_type")
        country_name = country_data.pop("to_country_en", None)
        country_name = country_name or country_data.pop("to_country_ru")
        # print(country_name)
        res = context.lookup("country_links", rel_type)
        if res is None:
            context.log.warn(
                "Unknown country link",
                rel_type=rel_type,
                entity=entity,
                country=country_name,
            )
            continue
        if res.prop is not None:
            entity.add(res.prop, country_name)
        # h.audit_data(country_data)

    for rel_data in data.pop("related_persons", []):
        other_wdid = clean_wdid(rel_data.pop("person_wikidata_id"))
        other_id = person_id(context, rel_data.pop("person_id"), other_wdid)

        rel_type = rel_data.pop("relationship_type_en", None)
        rel_type_ru = rel_data.pop("relationship_type_ru", None)
        rel_type = rel_type or rel_type_ru
        res = context.lookup("person_relations", rel_type)
        if res is None:
            context.log.info(
                "Unknown company/person relation type",
                rel_type=rel_type,
                entity=entity,
                other=other_id,
            )
            continue

        if res.schema is None:
            continue

        if res.schema == "Organization" and res.from_prop == "asset":
            entity.schema = model.get("Company")

        rel = context.make(res.schema)
        id_a_short = short_id(context, entity.id)
        id_b_short = short_id(context, other_id)
        rel.id = context.make_slug(id_a_short, res.schema, id_b_short)
        rel.add(res.from_prop, entity.id)
        rel.add(res.to_prop, other_id)
        rel.add(res.desc_prop, rel_type)
        rel.add("modifiedAt", parse_date(rel_data.pop("date_confirmed")))
        rel.add("startDate", parse_date(rel_data.pop("date_established")))
        rel.add("endDate", parse_date(rel_data.pop("date_finished")))
        context.emit(rel)

    for rel_data in data.pop("related_companies", []):
        # pprint(rel_data)
        # other_id = company_id(context, rel_data.pop("company_id"))

        # rel_type = rel_data.pop("relationship_type_en", None)
        # rel_type_ru = rel_data.pop("relationship_type_ru", None)
        # rel_type = rel_type or rel_type_ru
        # res = context.lookup("company_relations", rel_type)
        # if res is None:
        #     context.log.warn(
        #         "Unknown company/company relation type",
        #         rel_type=rel_type,
        #         entity=entity,
        #         other=other_id,
        #     )
        #     continue

        # if res.schema is None:
        #     continue

        # if res.schema == "Organization" and res.from_prop == "asset":
        #     entity.schema = model.get("Company")

        # rel = context.make(res.schema)
        # id_a_short = short_id(context, entity.id)
        # id_b_short = short_id(context, other_id)
        # rel.id = context.make_slug(id_a_short, res.schema, id_b_short)
        # rel.add(res.from_prop, entity.id)
        # rel.add(res.to_prop, other_id)
        # rel.add(res.desc_prop, rel_type)
        # rel.add("modifiedAt", parse_date(rel_data.pop("date_confirmed")))
        # rel.add("startDate", parse_date(rel_data.pop("date_established")))
        # rel.add("endDate", parse_date(rel_data.pop("date_finished")))
        # context.emit(rel)
        pass

    address = h.make_address(
        context,
        street=data.pop("street", None),
        city=data.pop("city", None),
    )
    h.apply_address(context, entity, address)

    if data.pop("state_company", False):
        entity.add("topics", "gov.soe")

    ignore = [
        "wiki",
        "bank_name",
        "other_founders",
        "other_owners",
        "other_managers",
        "other_recipient",
    ]
    h.audit_data(data, ignore=ignore)
    # print(entity.to_dict())
    context.emit(entity)
示例#11
0
def crawl_row(context: Context, data: Dict[str, str]):
    entity = context.make("LegalEntity")
    ind_id = data.pop("INDIVIDUAL_Id", data.pop("IndividualID"))
    entity.id = context.make_slug(ind_id)
    assert entity.id, data
    entity.add("notes", h.clean_note(data.pop("COMMENTS", None)))
    entity.add("notes", h.clean_note(data.pop("Comments", None)))
    entity.add("notes", h.clean_note(data.pop("NOTE", None)))
    entity.add("notes", h.clean_note(data.pop("NOTE1", None)))
    entity.add("notes", h.clean_note(data.pop("NOTE2", None)))
    entity.add("notes", h.clean_note(data.pop("NOTE3", None)))
    entity.add_cast("Person", "nationality", data.pop("NATIONALITY", None))
    entity.add_cast("Person", "nationality", data.pop("Nationality", None))
    entity.add_cast("Person", "title", data.pop("TITLE", None))
    entity.add_cast("Person", "title", data.pop("Title", None))
    entity.add_cast("Person", "position", data.pop("DESIGNATION", None))
    entity.add_cast("Person", "position", data.pop("Designation", None))
    entity.add_cast("Person", "birthPlace", data.pop("PLACEOFBIRTH", None))
    entity.add_cast("Person", "birthPlace",
                    data.pop("IndividualPlaceOfBirth", None))
    entity.add_cast("Person", "birthPlace", data.pop("CITY_OF_BIRTH", None))
    entity.add_cast("Person", "birthDate", data.pop("YEAR", None))
    entity.add_cast("Person", "gender", data.pop("GENDER", None))
    entity.add_cast("Person", "birthDate", parse_date(data.pop("DATE", None)))
    entity.add_cast("Person", "birthDate",
                    parse_date(data.pop("DATE_OF_BIRTH", None)))
    dob = parse_date(data.pop("IndividualDateOfBirth", None))
    entity.add_cast("Person", "birthDate", dob)

    data.pop("BIRTHPLACE_x0020_CITY", None)
    data.pop("BIRTHPLACE_x0020_STATE_PROVINCE", None)
    entity.add("country", data.pop("BIRTHPLACE_x0020_COUNTRY", None))
    entity.add("country", data.pop("COUNTRY_OF_BIRTH", None))
    entity.add_cast("Person", "birthPlace",
                    data.pop("BIRTHPLACE_x0020_NOTE", None))

    h.apply_name(
        entity,
        full=data.pop("FullName", None),
        given_name=data.pop("FIRST_NAME", None),
        second_name=data.pop("SECOND_NAME", None),
        name3=data.pop("THIRD_NAME", None),
        name4=data.pop("FOURTH_NAME", None),
        quiet=True,
    )

    alias = data.pop("NAME_ORIGINAL_SCRIPT", None)
    if alias is not None and "?" not in alias:
        entity.add("alias", alias)
    entity.add("alias", data.pop("SORT_KEY", None))
    data.pop("IndividualAlias", None)

    entity.add_cast("Person", "passportNumber", data.pop("PASSPORT", None))
    entity.add_cast("Person", "passportNumber",
                    data.pop("IndividualDocument", None))
    data.pop("DATE_OF_ISSUE", None)
    data.pop("CITY_OF_ISSUE", None)
    entity.add("country", data.pop("COUNTRY_OF_ISSUE", None))
    entity.add_cast("Person", "idNumber", data.pop("IDNUMBER", None))

    address = h.make_address(
        context,
        # remarks=data.pop("NOTE"),
        full=data.pop("IndividualAddress", None),
        street=data.pop("STREET", None),
        city=data.pop("CITY", None),
        region=data.pop("STATE_PROVINCE", None),
        postal_code=data.pop("ZIP_CODE", None),
        country=data.pop("COUNTRY", None),
    )
    h.apply_address(context, entity, address)

    sanction = h.make_sanction(context, entity)
    inserted_at = parse_date(data.pop("DateInserted", None))
    listed_on = data.pop("ListedON", data.pop("ListedOn", None))
    listed_at = parse_date(listed_on)
    entity.add("createdAt", inserted_at or listed_at)
    sanction.add("listingDate", listed_at or inserted_at)
    sanction.add("startDate", data.pop("FROM_YEAR", None))
    sanction.add("endDate", data.pop("TO_YEAR", None))
    sanction.add("program", data.pop("UN_LIST_TYPE", None))
    sanction.add("unscId", data.pop("REFERENCE_NUMBER", None))
    sanction.add("unscId", data.pop("ReferenceNumber", None))
    sanction.add("authority", data.pop("SUBMITTED_BY", None))

    entity.add("topics", "sanction")
    h.audit_data(data,
                 ignore=["VERSIONNUM", "TYPE_OF_DATE", "ApplicationStatus"])
    context.emit(entity, target=True)
    context.emit(sanction)
示例#12
0
def parse_row(context: Context, row):
    group_type = row.pop("GroupTypeDescription")
    schema = TYPES.get(group_type)
    if schema is None:
        context.log.error("Unknown group type", group_type=group_type)
        return
    entity = context.make(schema)
    entity.id = context.make_slug(row.pop("GroupID"))
    sanction = h.make_sanction(context, entity)
    sanction.add("program", row.pop("RegimeName"))
    sanction.add("authority", row.pop("ListingType", None))
    listed_date = h.parse_date(row.pop("DateListed"), FORMATS)
    sanction.add("listingDate", listed_date)
    designated_date = h.parse_date(row.pop("DateDesignated"), FORMATS)
    sanction.add("startDate", designated_date)

    entity.add("createdAt", listed_date)
    if not entity.has("createdAt"):
        entity.add("createdAt", designated_date)

    sanction.add("authorityId", row.pop("UKSanctionsListRef", None))
    sanction.add("unscId", row.pop("UNRef", None))
    sanction.add("status", row.pop("GroupStatus", None))
    sanction.add("reason", row.pop("UKStatementOfReasons", None))

    last_updated = h.parse_date(row.pop("LastUpdated"), FORMATS)
    sanction.add("modifiedAt", last_updated)
    entity.add("modifiedAt", last_updated)

    # TODO: derive topics and schema from this??
    entity_type = row.pop("Entity_Type", None)
    entity.add_cast("LegalEntity", "legalForm", entity_type)

    reg_number = row.pop("Entity_BusinessRegNumber", None)
    entity.add_cast("LegalEntity", "registrationNumber", reg_number)

    row.pop("Ship_Length", None)
    entity.add_cast("Vessel", "flag", row.pop("Ship_Flag", None))
    flags = split_new(row.pop("Ship_PreviousFlags", None))
    entity.add_cast("Vessel", "pastFlags", flags)
    entity.add_cast("Vessel", "type", row.pop("Ship_Type", None))
    entity.add_cast("Vessel", "tonnage", row.pop("Ship_Tonnage", None))
    entity.add_cast("Vessel", "buildDate", row.pop("Ship_YearBuilt", None))
    entity.add_cast("Vessel", "imoNumber", row.pop("Ship_IMONumber", None))

    ship_owner = row.pop("Ship_CurrentOwners", None)
    if ship_owner is not None:
        owner = context.make("LegalEntity")
        owner.id = context.make_slug("named", ship_owner)
        owner.add("name", ship_owner)
        context.emit(owner)

        ownership = context.make("Ownership")
        ownership.id = context.make_id(entity.id, "owns", owner.id)
        ownership.add("owner", owner)
        ownership.add("asset", entity)
        context.emit(ownership)

    countries = parse_countries(row.pop("Country", None))
    entity.add("country", countries)

    title = split_items(row.pop("Title", None))
    entity.add("title", title, quiet=True)

    pobs = split_items(row.pop("Individual_TownOfBirth", None))
    entity.add_cast("Person", "birthPlace", pobs)

    dob = h.parse_date(row.pop("Individual_DateOfBirth", None), FORMATS)
    entity.add_cast("Person", "birthDate", dob)

    cob = parse_countries(row.pop("Individual_CountryOfBirth", None))
    entity.add_cast("Person", "country", cob)

    nationalities = parse_countries(row.pop("Individual_Nationality", None))
    entity.add_cast("Person", "nationality", nationalities)

    positions = split_items(row.pop("Individual_Position", None))
    entity.add_cast("Person", "position", positions)

    entity.add_cast("Person", "gender", row.pop("Individual_Gender", None))

    name_type = row.pop("AliasType", None)
    name_prop = NAME_TYPES.get(name_type)
    if name_prop is None:
        context.log.warning("Unknown name type", type=name_type)
        return
    name_quality = row.pop("AliasQuality", None)
    is_weak = WEAK_QUALITY.get(name_quality)
    if is_weak is None:
        context.log.warning("Unknown name quality", quality=name_quality)
        return

    h.apply_name(
        entity,
        name1=row.pop("name1", None),
        name2=row.pop("name2", None),
        name3=row.pop("name3", None),
        name4=row.pop("name4", None),
        name5=row.pop("name5", None),
        tail_name=row.pop("Name6", None),
        name_prop=name_prop,
        is_weak=is_weak,
        quiet=True,
    )
    entity.add("alias", row.pop("NameNonLatinScript", None))

    full_address = join_text(
        row.pop("Address1", None),
        row.pop("Address2", None),
        row.pop("Address3", None),
        row.pop("Address4", None),
        row.pop("Address5", None),
        row.pop("Address6", None),
        sep=", ",
    )

    address = h.make_address(
        context,
        full=full_address,
        postal_code=row.pop("PostCode", None),
        country=first(countries),
    )
    h.apply_address(context, entity, address)

    passport_number = row.pop("Individual_PassportNumber", None)
    passport_numbers = split_items(passport_number)
    entity.add_cast("Person", "passportNumber", passport_numbers)
    passport_detail = row.pop("Individual_PassportDetails", None)
    # passport_details = split_items(passport_detail)
    # TODO: where do I stuff this?

    ni_number = row.pop("Individual_NINumber", None)
    ni_numbers = split_items(ni_number)
    entity.add_cast("Person", "idNumber", ni_numbers)
    ni_detail = row.pop("Individual_NIDetails", None)
    # ni_details = split_items(ni_detail)
    # TODO: where do I stuff this?

    for phone in split_new(row.pop("PhoneNumber", None)):
        entity.add_cast("LegalEntity", "phone", phone)

    for email in split_new(row.pop("EmailAddress", None)):
        entity.add_cast("LegalEntity", "email", email)

    for website in split_new(row.pop("Website", None)):
        entity.add_cast("LegalEntity", "website", website)

    for name in parse_companies(context, row.pop("Entity_ParentCompany",
                                                 None)):
        parent = context.make("Organization")
        parent.id = context.make_slug("named", name)
        parent.add("name", name)
        context.emit(parent)

        ownership = context.make("Ownership")
        ownership.id = context.make_id(entity.id, "owns", parent.id)
        ownership.add("owner", parent)
        ownership.add("asset", entity)
        context.emit(ownership)

    for name in parse_companies(context, row.pop("Entity_Subsidiaries", None)):
        subsidiary = context.make("Company")
        subsidiary.id = context.make_slug("named", name)
        subsidiary.add("name", name)
        context.emit(subsidiary)

        ownership = context.make("Ownership")
        ownership.id = context.make_id(entity.id, "owns", subsidiary.id)
        ownership.add("owner", entity)
        ownership.add("asset", subsidiary)
        context.emit(ownership)

    grp_status = row.pop("GrpStatus", None)
    if grp_status != "A":
        context.log.warning("Unknown GrpStatus", value=grp_status)

    entity.add("notes", h.clean_note(row.pop("OtherInformation", None)))
    h.audit_data(row, ignore=["NonLatinScriptLanguage", "NonLatinScriptType"])

    entity.add("topics", "sanction")
    context.emit(entity, target=True)
    context.emit(sanction)
示例#13
0
def parse_result(context: Context, result):
    type_ = result.pop("type", None)
    schema = context.lookup_value("type", type_)
    if schema is None:
        context.log.error("Unknown result type", type=type_)
        return
    entity = context.make(schema)
    entity.id = context.make_slug(result.pop("id"))

    entity_number = result.pop("entity_number", None)
    if entity_number is not None:
        assert int(entity_number)
        entity.id = context.make_slug(entity_number, dataset="us_ofac_sdn")

    name = result.pop("name", None)
    name = name.replace("and any successor, sub-unit, or subsidiary thereof",
                        "")
    entity.add("name", name)
    for alias in ensure_list(result.pop("alt_names", "")):
        entity.add("alias", alias.split("; "))
    entity.add("notes", result.pop("remarks", None))
    entity.add("country", result.pop("country", None))
    if entity.schema.is_a("Person"):
        entity.add("position", result.pop("title", None))
        entity.add("nationality", result.pop("nationalities", None))
        entity.add("nationality", result.pop("citizenships", None))
        for dob in result.pop("dates_of_birth", []):
            entity.add("birthDate", h.parse_date(dob, FORMATS))
        entity.add("birthPlace", result.pop("places_of_birth", None))
    elif entity.schema.is_a("Vessel"):
        entity.add("flag", result.pop("vessel_flag", None))
        entity.add("callSign", result.pop("call_sign", None))
        entity.add("type", result.pop("vessel_type", None))
        grt = result.pop("gross_registered_tonnage", None)
        entity.add("grossRegisteredTonnage", grt)
        gt = result.pop("gross_tonnage", None)
        entity.add("tonnage", gt)

        # TODO: make adjacent owner entity
        result.pop("vessel_owner", None)

    assert result.pop("title", None) is None
    assert not len(result.pop("nationalities", []))
    assert not len(result.pop("citizenships", []))
    assert not len(result.pop("dates_of_birth", []))
    assert not len(result.pop("places_of_birth", []))

    for address in result.pop("addresses", []):
        obj = h.make_address(
            context,
            street=address.get("address"),
            city=address.get("city"),
            postal_code=address.get("postal_code"),
            region=address.get("state"),
            country=address.get("country"),
        )
        h.apply_address(context, entity, obj)

    for ident in result.pop("ids", []):
        country = ident.pop("country")
        entity.add("country", country)
        h.apply_feature(
            context,
            entity,
            ident.pop("type"),
            ident.pop("number"),
            country=country,
            date_formats=FORMATS,
            start_date=ident.pop("issue_date", None),
            end_date=ident.pop("expiration_date", None),
        )

    sanction = context.make("Sanction")
    sanction.id = context.make_id(entity.id, "Sanction")
    sanction.add("entity", entity)
    sanction.add("program", result.pop("programs", []))
    sanction.add("provisions", result.pop("license_policy", []))
    sanction.add("reason", result.pop("license_requirement", []))
    sanction.add("authorityId", result.pop("federal_register_notice", None))
    sanction.add("startDate", result.pop("start_date", None))
    sanction.add("endDate", result.pop("end_date", None))
    sanction.add("country", "us")
    sanction.add("authority", result.pop("source", None))

    # TODO: deref
    source_url = deref_url(context, result.pop("source_information_url"))
    sanction.add("sourceUrl", source_url)
    result.pop("source_list_url")

    context.emit(sanction)
    context.emit(entity, target=True)

    h.audit_data(result, ignore=["standard_order"])