Пример #1
0
    def _serialize(self, obj):
        pk = obj.get("id")
        collection_id = obj.pop("collection_id", None)
        obj["collection"] = self.resolve(
            Collection, collection_id, CollectionSerializer
        )
        proxy = model.get_proxy(obj)
        properties = obj.get("properties", {})
        for prop in proxy.iterprops():
            if prop.type != registry.entity:
                continue
            values = ensure_list(properties.get(prop.name))
            properties[prop.name] = []
            for value in values:
                entity = self.resolve(Entity, value, EntitySerializer)
                properties[prop.name].append(entity or value)

        links = {
            "self": url_for("entities_api.view", entity_id=pk),
            "references": url_for("entities_api.references", entity_id=pk),
            "tags": url_for("entities_api.tags", entity_id=pk),
            "ui": entity_url(pk),
        }
        if proxy.schema.is_a(Document.SCHEMA):
            content_hash = first(properties.get("contentHash"))
            if content_hash:
                name = entity_filename(proxy)
                mime = first(properties.get("mimeType"))
                links["file"] = archive_url(
                    content_hash,
                    file_name=name,
                    mime_type=mime,
                    expire=request.authz.expire,
                )

            pdf_hash = first(properties.get("pdfHash"))
            if pdf_hash:
                name = entity_filename(proxy, extension="pdf")
                links["pdf"] = archive_url(
                    pdf_hash,
                    file_name=name,
                    mime_type=PDF,
                    expire=request.authz.expire,
                )
            csv_hash = first(properties.get("csvHash"))
            if csv_hash:
                name = entity_filename(proxy, extension="csv")
                links["csv"] = archive_url(
                    csv_hash,
                    file_name=name,
                    mime_type=CSV,
                    expire=request.authz.expire,
                )

        obj["links"] = links
        obj["latinized"] = transliterate_values(proxy)
        obj["writeable"] = check_write_entity(obj, request.authz)
        obj["shallow"] = obj.get("shallow", True)
        return obj
Пример #2
0
    def _serialize(self, obj):
        pk = obj.get('id')
        obj['id'] = str(pk)
        authz = request.authz
        collection_id = obj.pop('collection_id', None)
        obj['collection'] = self.resolve(Collection, collection_id,
                                         CollectionSerializer)
        schema = model.get(obj.get('schema'))
        if schema is None:
            return None
        obj['schemata'] = schema.names
        properties = obj.get('properties', {})
        for prop in schema.properties.values():
            if prop.type != registry.entity:
                continue
            values = ensure_list(properties.get(prop.name))
            properties[prop.name] = []
            for value in values:
                entity = self.resolve(Entity, value, EntitySerializer)
                properties[prop.name].append(entity)

        links = {
            'self': url_for('entities_api.view', entity_id=pk),
            'references': url_for('entities_api.references', entity_id=pk),
            'tags': url_for('entities_api.tags', entity_id=pk),
            'ui': entity_url(pk)
        }
        if schema.is_a(Document.SCHEMA):
            links['content'] = url_for('entities_api.content', entity_id=pk)
            file_name = first(properties.get('fileName'))
            content_hash = first(properties.get('contentHash'))
            if content_hash:
                mime_type = first(properties.get('mimeType'))
                name = safe_filename(file_name, default=pk)
                links['file'] = archive_url(request.authz.id,
                                            content_hash,
                                            file_name=name,
                                            mime_type=mime_type)

            pdf_hash = first(properties.get('pdfHash'))
            if pdf_hash:
                name = safe_filename(file_name, default=pk, extension='.pdf')
                links['pdf'] = archive_url(request.authz.id,
                                           pdf_hash,
                                           file_name=name,
                                           mime_type=PDF)
            csv_hash = first(properties.get('csvHash'))
            if csv_hash:
                name = safe_filename(file_name, default=pk, extension='.csv')
                links['csv'] = archive_url(request.authz.id,
                                           csv_hash,
                                           file_name=name,
                                           mime_type=CSV)

        obj['links'] = links
        obj['writeable'] = authz.can(collection_id, authz.WRITE)
        obj.pop('_index', None)
        return self._clean_response(obj)
Пример #3
0
    def _serialize(self, obj):
        pk = obj.get('id')
        collection_id = obj.pop('collection_id', None)
        obj['collection'] = self.resolve(Collection, collection_id,
                                         CollectionSerializer)
        proxy = model.get_proxy(obj)
        obj['schemata'] = proxy.schema.names
        properties = obj.get('properties', {})
        for prop in proxy.iterprops():
            if prop.type != registry.entity:
                continue
            values = ensure_list(properties.get(prop.name))
            properties[prop.name] = []
            for value in values:
                entity = self.resolve(Entity, value, EntitySerializer)
                properties[prop.name].append(entity or value)

        links = {
            'self': url_for('entities_api.view', entity_id=pk),
            'references': url_for('entities_api.references', entity_id=pk),
            'tags': url_for('entities_api.tags', entity_id=pk),
            'ui': entity_url(pk)
        }
        if proxy.schema.is_a(Document.SCHEMA):
            links['content'] = url_for('entities_api.content', entity_id=pk)
            content_hash = first(properties.get('contentHash'))
            if content_hash:
                name = entity_filename(proxy)
                mime_type = first(properties.get('mimeType'))
                links['file'] = archive_url(request.authz.id,
                                            content_hash,
                                            file_name=name,
                                            mime_type=mime_type)

            pdf_hash = first(properties.get('pdfHash'))
            if pdf_hash:
                name = entity_filename(proxy, extension='pdf')
                links['pdf'] = archive_url(request.authz.id,
                                           pdf_hash,
                                           file_name=name,
                                           mime_type=PDF)
            csv_hash = first(properties.get('csvHash'))
            if csv_hash:
                name = entity_filename(proxy, extension='csv')
                links['csv'] = archive_url(request.authz.id,
                                           csv_hash,
                                           file_name=name,
                                           mime_type=CSV)

        obj['links'] = links
        write = request.authz.WRITE
        obj['writeable'] = request.authz.can(collection_id, write)
        return obj
Пример #4
0
def format_proxy(proxy, collection):
    """Apply final denormalisations to the index."""
    # Abstract entities can appear when profile fragments for a missing entity
    # are present.
    if proxy.schema.abstract:
        return None

    data = proxy.to_full_dict()
    data["schemata"] = list(proxy.schema.names)
    data["caption"] = proxy.caption

    names = data.get("names", [])
    fps = set([fingerprints.generate(name) for name in names])
    fps.update(names)
    data["fingerprints"] = [fp for fp in fps if fp is not None]

    # Slight hack: a magic property in followthemoney that gets taken out
    # of the properties and added straight to the index text.
    properties = data.get("properties")
    data["text"] = properties.pop("indexText", [])

    # integer casting
    numeric = {}
    for prop in proxy.iterprops():
        if prop.type in NUMERIC_TYPES:
            values = proxy.get(prop)
            numeric[prop.name] = _numeric_values(prop.type, values)
    # also cast group field for dates
    numeric["dates"] = _numeric_values(registry.date, data.get("dates"))
    data["numeric"] = numeric

    # Context data - from aleph system, not followthemoney.
    data["collection_id"] = collection.id
    data["role_id"] = first(data.get("role_id"))
    data["profile_id"] = first(data.get("profile_id"))
    data["mutable"] = max(ensure_list(data.get("mutable")), default=False)
    data["origin"] = ensure_list(data.get("origin"))
    # Logical simplifications of dates:
    created_at = ensure_list(data.get("created_at"))
    if len(created_at) > 0:
        data["created_at"] = min(created_at)
    updated_at = ensure_list(data.get("updated_at")) or created_at
    if len(updated_at) > 0:
        data["updated_at"] = max(updated_at)

    # log.info("%s", pformat(data))
    entity_id = data.pop("id")
    return {
        "_id": entity_id,
        "_index": entities_write_index(proxy.schema),
        "_source": data,
    }
Пример #5
0
 def make_filename(self, entity):
     """Some of the file importers actually care about the file
     extension, so this is trying to make sure we use a temporary
     file name that has an appropriate extension."""
     for file_name in entity.get('fileName', quiet=True):
         _, extension = os.path.splitext(file_name)
         if len(extension):
             return safe_filename(file_name)
     extension = first(entity.get('extension', quiet=True))
     if extension is None:
         mime_type = first(entity.get('mimeType', quiet=True))
         if mime_type is not None:
             extension = guess_extension(mime_type)
     extension = extension or 'bin'
     return safe_filename('data', extension=extension)
Пример #6
0
    def pick(self, values: Sequence[str]) -> Optional[str]:
        """From a set of names, pick the most plausible user-facing one."""
        # Sort to get stable results when it's a coin toss:
        values = sorted(values)
        if not len(values):
            return None
        normalised = []
        lookup: Dict[str, List[str]] = {}
        # We're doing this in two stages, to avoid name forms with varied casing
        # (e.g. Smith vs. SMITH) are counted as widly different, leading to
        # implausible median outcomes.
        for value in values:
            norm = slugify(value, sep=" ")
            if norm is None:
                continue
            normalised.append(norm)
            lookup.setdefault(norm, [])
            lookup[norm].append(value)

        norm = setmedian(normalised)
        if norm is None:
            return None
        forms = lookup.get(norm, [])
        if len(forms) <= 1:
            return first(forms)
        return cast(str, setmedian(forms))
Пример #7
0
    def expand(self, objs, many=False):
        cache = {}
        for obj in ensure_list(objs):
            for (field, type_, _, _, _) in self.EXPAND:
                type_ = self._type_dispatch(type_)
                for key in self._get_values(obj, field):
                    cache[(type_, key)] = None

        self._resolve_roles(cache)
        self._resolve_index(cache)

        for obj in ensure_list(objs):
            for (field, type_, target, schema, multi) in self.EXPAND:
                value = []
                type_ = self._type_dispatch(type_)
                for key in self._get_values(obj, field):
                    value.append(cache.get((type_, key)))

                if not multi:
                    value = first(value)

                obj.pop(field, None)
                if value is not None:
                    value, _ = schema().dump(value, many=multi)
                    obj[target] = value
Пример #8
0
def get_table_csv_link(table_id):
    table = get_entity(table_id)
    properties = table.get('properties', {})
    csv_hash = first(properties.get('csvHash'))
    if csv_hash is None:
        raise RuntimeError("Source table doesn't have a CSV version")
    url = archive.generate_url(csv_hash)
    if not url:
        local_path = archive.load_file(csv_hash)
        if local_path is not None:
            url = local_path.as_posix()
    if url is None:
        raise RuntimeError("Could not generate CSV URL for the table")
    return url
Пример #9
0
def make_mapper(collection, mapping):
    table = get_entity(mapping.table_id)
    properties = table.get('properties', {})
    csv_hash = first(properties.get('csvHash'))
    if csv_hash is None:
        raise RuntimeError("Source table doesn't have a CSV version")
    url = archive.generate_url(csv_hash)
    if not url:
        local_path = archive.load_file(csv_hash)
        if local_path is not None:
            url = local_path.as_posix()
    if url is None:
        raise RuntimeError("Could not generate CSV URL for the table")
    data = {'csv_url': url, 'entities': mapping.query}
    return model.make_mapping(data, key_prefix=collection.foreign_id)
Пример #10
0
def load_locations(context: Context, doc):
    locations = {}
    for location in doc.findall("./Locations/Location"):
        location_id = location.get("ID")

        countries = set()
        for area in location.findall("./LocationAreaCode"):
            area_code = ref_get("AreaCode", area.get("AreaCodeID"))
            countries.add(area_code.get("Description"))

        for country in location.findall("./LocationCountry"):
            country_obj = ref_get("Country", country.get("CountryID"))
            countries.add(country_obj.get("Value"))

        if len(countries) > 1:
            context.log.warn("Multiple countries", countries=countries)

        parts = {}
        for part in location.findall("./LocationPart"):
            type_ = ref_value("LocPartType", part.get("LocPartTypeID"))
            parts[type_] = part.findtext("./LocationPartValue/Value")

        country = first(countries)
        unknown = parts.get("Unknown")
        if registry.country.clean(unknown, fuzzy=True):
            country = unknown

        if country == "undetermined":
            country = unknown = None

        address = h.make_address(
            context,
            full=unknown,
            street=parts.get("ADDRESS1"),
            street2=parts.get("ADDRESS2"),
            street3=parts.get("ADDRESS3"),
            city=parts.get("CITY"),
            postal_code=parts.get("POSTAL CODE"),
            region=parts.get("REGION"),
            state=parts.get("STATE/PROVINCE"),
            country=country,
        )
        if address.id is not None:
            context.emit(address)
            locations[location_id] = address
    return locations
Пример #11
0
    def expand(self, objs, many=False):
        cache = {}
        for obj in ensure_list(objs):
            for (field, type_, target, schema, multi) in self.EXPAND:
                value = []
                for key in self._get_values(obj, field):
                    if (type_, key) not in cache:
                        cache[(type_, key)] = self._get_object(type_, key)
                    value.append(cache.get((type_, key)))

                if not multi:
                    value = first(value)

                obj.pop(field, None)
                if value is not None:
                    value, _ = schema().dump(value, many=multi)
                    obj[target] = value
Пример #12
0
def index(collection_id):
    """Returns a list of mappings for the collection and table.
    ---
    get:
      summary: List mappings
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
      - description: The table id.
        in: query
        name: table
        schema:
          type: string
      requestBody:
      responses:
        '200':
          content:
            application/json:
              schema:
                type: object
                allOf:
                - $ref: '#/components/schemas/QueryResponse'
                properties:
                  results:
                    type: array
                    items:
                      $ref: '#/components/schemas/Mapping'
          description: OK
      tags:
        - Collection
        - Mapping
    """
    collection = get_db_collection(collection_id)
    parser = QueryParser(request.args, request.authz)
    table_id = first(parser.filters.get("table"))
    q = Mapping.by_collection(collection.id, table_id=table_id)
    result = DatabaseQueryResult(request, q, parser=parser)
    return MappingSerializer.jsonify_result(result)
Пример #13
0
def format_proxy(proxy, collection):
    """Apply final denormalisations to the index."""
    data = proxy.to_full_dict()
    data['schemata'] = list(proxy.schema.names)

    names = ensure_list(data.get('names'))
    fps = set([fingerprints.generate(name) for name in names])
    fps.update(names)
    data['fingerprints'] = [fp for fp in fps if fp is not None]

    # Slight hack: a magic property in followthemoney that gets taken out
    # of the properties and added straight to the index text.
    properties = data.get('properties')
    text = properties.pop('indexText', [])
    text.extend(fps)
    data['text'] = text

    # integer casting
    numeric = {}
    for prop in proxy.iterprops():
        if prop.type in NUMERIC_TYPES:
            values = proxy.get(prop)
            numeric[prop.name] = _numeric_values(prop.type, values)
    # also cast group field for dates
    numeric['dates'] = _numeric_values(registry.date, data.get('dates'))
    data['numeric'] = numeric

    # Context data - from aleph system, not followthemoney.
    now = iso_text(datetime.utcnow())
    data['created_at'] = min(ensure_list(data.get('created_at')), default=now)
    data['updated_at'] = min(ensure_list(data.get('updated_at')), default=now)
    # FIXME: Can there ever really be multiple role_ids?
    data['role_id'] = first(data.get('role_id'))
    data['mutable'] = max(ensure_list(data.get('mutable')), default=False)
    data['origin'] = ensure_list(data.get('origin'))
    data['collection_id'] = collection.id
    # log.info("%s", pformat(data))
    entity_id = data.pop('id')
    return {
        '_id': entity_id,
        '_index': entities_write_index(data.get('schema')),
        '_source': data
    }
Пример #14
0
def format_proxy(proxy, collection):
    """Apply final denormalisations to the index."""
    data = proxy.to_full_dict()
    data["schemata"] = list(proxy.schema.names)

    names = data.get("names", [])
    fps = set([fingerprints.generate(name) for name in names])
    fps.update(names)
    data["fingerprints"] = [fp for fp in fps if fp is not None]

    # Slight hack: a magic property in followthemoney that gets taken out
    # of the properties and added straight to the index text.
    properties = data.get("properties")
    text = properties.pop("indexText", [])
    text.extend(fps)
    data["text"] = text

    # integer casting
    numeric = {}
    for prop in proxy.iterprops():
        if prop.type in NUMERIC_TYPES:
            values = proxy.get(prop)
            numeric[prop.name] = _numeric_values(prop.type, values)
    # also cast group field for dates
    numeric["dates"] = _numeric_values(registry.date, data.get("dates"))
    data["numeric"] = numeric

    # Context data - from aleph system, not followthemoney.
    # FIXME: Can there ever really be multiple role_ids?
    data["role_id"] = first(data.get("role_id"))
    data["mutable"] = max(ensure_list(data.get("mutable")), default=False)
    data["origin"] = ensure_list(data.get("origin"))
    created_at = data.get("created_at")
    if created_at:
        data["updated_at"] = data.get("updated_at", created_at)
    data["collection_id"] = collection.id
    # log.info("%s", pformat(data))
    entity_id = data.pop("id")
    return {
        "_id": entity_id,
        "_index": entities_write_index(data.get("schema")),
        "_source": data,
    }
Пример #15
0
 def pick(self, values):
     values = [sanitize_text(v) for v in ensure_list(values)]
     values = [v for v in values if v is not None]
     if len(values) <= 1:
         return first(values)
     return setmedian(sorted(values))
Пример #16
0
def parse_row(context: Context, row):
    group_type = row.pop("GroupTypeDescription")
    schema = TYPES.get(group_type)
    if schema is None:
        context.log.error("Unknown group type", group_type=group_type)
        return
    entity = context.make(schema)
    entity.id = context.make_slug(row.pop("GroupID"))
    sanction = h.make_sanction(context, entity)
    sanction.add("program", row.pop("RegimeName"))
    sanction.add("authority", row.pop("ListingType", None))
    listed_date = h.parse_date(row.pop("DateListed"), FORMATS)
    sanction.add("listingDate", listed_date)
    designated_date = h.parse_date(row.pop("DateDesignated"), FORMATS)
    sanction.add("startDate", designated_date)

    entity.add("createdAt", listed_date)
    if not entity.has("createdAt"):
        entity.add("createdAt", designated_date)

    sanction.add("authorityId", row.pop("UKSanctionsListRef", None))
    sanction.add("unscId", row.pop("UNRef", None))
    sanction.add("status", row.pop("GroupStatus", None))
    sanction.add("reason", row.pop("UKStatementOfReasons", None))

    last_updated = h.parse_date(row.pop("LastUpdated"), FORMATS)
    sanction.add("modifiedAt", last_updated)
    entity.add("modifiedAt", last_updated)

    # TODO: derive topics and schema from this??
    entity_type = row.pop("Entity_Type", None)
    entity.add_cast("LegalEntity", "legalForm", entity_type)

    reg_number = row.pop("Entity_BusinessRegNumber", None)
    entity.add_cast("LegalEntity", "registrationNumber", reg_number)

    row.pop("Ship_Length", None)
    entity.add_cast("Vessel", "flag", row.pop("Ship_Flag", None))
    flags = split_new(row.pop("Ship_PreviousFlags", None))
    entity.add_cast("Vessel", "pastFlags", flags)
    entity.add_cast("Vessel", "type", row.pop("Ship_Type", None))
    entity.add_cast("Vessel", "tonnage", row.pop("Ship_Tonnage", None))
    entity.add_cast("Vessel", "buildDate", row.pop("Ship_YearBuilt", None))
    entity.add_cast("Vessel", "imoNumber", row.pop("Ship_IMONumber", None))

    ship_owner = row.pop("Ship_CurrentOwners", None)
    if ship_owner is not None:
        owner = context.make("LegalEntity")
        owner.id = context.make_slug("named", ship_owner)
        owner.add("name", ship_owner)
        context.emit(owner)

        ownership = context.make("Ownership")
        ownership.id = context.make_id(entity.id, "owns", owner.id)
        ownership.add("owner", owner)
        ownership.add("asset", entity)
        context.emit(ownership)

    countries = parse_countries(row.pop("Country", None))
    entity.add("country", countries)

    title = split_items(row.pop("Title", None))
    entity.add("title", title, quiet=True)

    pobs = split_items(row.pop("Individual_TownOfBirth", None))
    entity.add_cast("Person", "birthPlace", pobs)

    dob = h.parse_date(row.pop("Individual_DateOfBirth", None), FORMATS)
    entity.add_cast("Person", "birthDate", dob)

    cob = parse_countries(row.pop("Individual_CountryOfBirth", None))
    entity.add_cast("Person", "country", cob)

    nationalities = parse_countries(row.pop("Individual_Nationality", None))
    entity.add_cast("Person", "nationality", nationalities)

    positions = split_items(row.pop("Individual_Position", None))
    entity.add_cast("Person", "position", positions)

    entity.add_cast("Person", "gender", row.pop("Individual_Gender", None))

    name_type = row.pop("AliasType", None)
    name_prop = NAME_TYPES.get(name_type)
    if name_prop is None:
        context.log.warning("Unknown name type", type=name_type)
        return
    name_quality = row.pop("AliasQuality", None)
    is_weak = WEAK_QUALITY.get(name_quality)
    if is_weak is None:
        context.log.warning("Unknown name quality", quality=name_quality)
        return

    h.apply_name(
        entity,
        name1=row.pop("name1", None),
        name2=row.pop("name2", None),
        name3=row.pop("name3", None),
        name4=row.pop("name4", None),
        name5=row.pop("name5", None),
        tail_name=row.pop("Name6", None),
        name_prop=name_prop,
        is_weak=is_weak,
        quiet=True,
    )
    entity.add("alias", row.pop("NameNonLatinScript", None))

    full_address = join_text(
        row.pop("Address1", None),
        row.pop("Address2", None),
        row.pop("Address3", None),
        row.pop("Address4", None),
        row.pop("Address5", None),
        row.pop("Address6", None),
        sep=", ",
    )

    address = h.make_address(
        context,
        full=full_address,
        postal_code=row.pop("PostCode", None),
        country=first(countries),
    )
    h.apply_address(context, entity, address)

    passport_number = row.pop("Individual_PassportNumber", None)
    passport_numbers = split_items(passport_number)
    entity.add_cast("Person", "passportNumber", passport_numbers)
    passport_detail = row.pop("Individual_PassportDetails", None)
    # passport_details = split_items(passport_detail)
    # TODO: where do I stuff this?

    ni_number = row.pop("Individual_NINumber", None)
    ni_numbers = split_items(ni_number)
    entity.add_cast("Person", "idNumber", ni_numbers)
    ni_detail = row.pop("Individual_NIDetails", None)
    # ni_details = split_items(ni_detail)
    # TODO: where do I stuff this?

    for phone in split_new(row.pop("PhoneNumber", None)):
        entity.add_cast("LegalEntity", "phone", phone)

    for email in split_new(row.pop("EmailAddress", None)):
        entity.add_cast("LegalEntity", "email", email)

    for website in split_new(row.pop("Website", None)):
        entity.add_cast("LegalEntity", "website", website)

    for name in parse_companies(context, row.pop("Entity_ParentCompany",
                                                 None)):
        parent = context.make("Organization")
        parent.id = context.make_slug("named", name)
        parent.add("name", name)
        context.emit(parent)

        ownership = context.make("Ownership")
        ownership.id = context.make_id(entity.id, "owns", parent.id)
        ownership.add("owner", parent)
        ownership.add("asset", entity)
        context.emit(ownership)

    for name in parse_companies(context, row.pop("Entity_Subsidiaries", None)):
        subsidiary = context.make("Company")
        subsidiary.id = context.make_slug("named", name)
        subsidiary.add("name", name)
        context.emit(subsidiary)

        ownership = context.make("Ownership")
        ownership.id = context.make_id(entity.id, "owns", subsidiary.id)
        ownership.add("owner", entity)
        ownership.add("asset", subsidiary)
        context.emit(ownership)

    grp_status = row.pop("GrpStatus", None)
    if grp_status != "A":
        context.log.warning("Unknown GrpStatus", value=grp_status)

    entity.add("notes", h.clean_note(row.pop("OtherInformation", None)))
    h.audit_data(row, ignore=["NonLatinScriptLanguage", "NonLatinScriptType"])

    entity.add("topics", "sanction")
    context.emit(entity, target=True)
    context.emit(sanction)
Пример #17
0
def parse_row(context, row):
    group_type = row.pop("GroupTypeDescription")
    org_type = row.pop("OrgType", None)
    if group_type == "Individual":
        base_schema = "Person"
    elif row.get("TypeOfVessel") is not None:
        base_schema = "Vessel"
    elif group_type == "Entity":
        base_schema = context.lookup_value("org_type", org_type,
                                           "Organization")
    else:
        context.log.error("Unknown entity type", group_type=group_type)
        return
    entity = context.make(base_schema)
    entity.id = context.make_slug(row.pop("GroupID"))
    if org_type is not None:
        org_types = split_items(org_type)
        entity.add_cast("LegalEntity", "legalForm", org_types)

    sanction = h.make_sanction(context, entity)
    # entity.add("position", row.pop("Position"), quiet=True)
    entity.add("notes", row.pop("OtherInformation", None), quiet=True)
    entity.add("notes",
               row.pop("FurtherIdentifiyingInformation", None),
               quiet=True)

    sanction.add("program", row.pop("RegimeName"))
    sanction.add("authority", row.pop("ListingType", None))
    sanction.add("startDate", h.parse_date(row.pop("DateListed"), FORMATS))
    sanction.add("recordId", row.pop("FCOId", None))
    sanction.add("status", row.pop("GroupStatus", None))
    sanction.add("reason", row.pop("UKStatementOfReasons", None))

    last_updated = h.parse_date(row.pop("LastUpdated"), FORMATS)
    if last_updated is not None:
        sanction.add("modifiedAt", last_updated)
        sanction.context["updated_at"] = last_updated
        entity.add("modifiedAt", last_updated)
        entity.context["updated_at"] = last_updated

    # DoB is sometimes a year only
    row.pop("DateOfBirth", None)
    dob = parse_parts(
        row.pop("YearOfBirth", 0),
        row.pop("MonthOfBirth", 0),
        row.pop("DayOfBirth", 0),
    )
    entity.add_cast("Person", "birthDate", dob)

    gender = h.clean_gender(row.pop("Gender", None))
    entity.add_cast("Person", "gender", gender)
    id_number = row.pop("NationalIdNumber", None)
    entity.add_cast("LegalEntity", "idNumber", split_items(id_number))
    passport = row.pop("PassportDetails", None)
    entity.add_cast("Person", "passportNumber", split_items(passport))

    flag = row.pop("FlagOfVessel", None)
    entity.add_cast("Vessel", "flag", flag)

    prev_flag = row.pop("PreviousFlags", None)
    entity.add_cast("Vessel", "pastFlags", prev_flag)

    year = row.pop("YearBuilt", None)
    entity.add_cast("Vehicle", "buildDate", year)

    type_ = row.pop("TypeOfVessel", None)
    entity.add_cast("Vehicle", "type", type_)

    imo = row.pop("IMONumber", None)
    entity.add_cast("Vessel", "imoNumber", imo)

    tonnage = row.pop("TonnageOfVessel", None)
    entity.add_cast("Vessel", "tonnage", tonnage)
    row.pop("LengthOfVessel", None)

    # entity.add("legalForm", org_type)
    title = split_items(row.pop("NameTitle", None))
    entity.add("title", title, quiet=True)
    entity.add("firstName", row.pop("name1", None), quiet=True)
    entity.add("secondName", row.pop("name2", None), quiet=True)
    entity.add("middleName", row.pop("name3", None), quiet=True)
    entity.add("middleName", row.pop("name4", None), quiet=True)
    entity.add("middleName", row.pop("name5", None), quiet=True)
    name6 = row.pop("Name6", None)
    entity.add("lastName", name6, quiet=True)
    full_name = row.pop("FullName", name6)
    row.pop("AliasTypeName")
    if row.pop("AliasType") == "AKA":
        entity.add("alias", full_name)
    else:
        entity.add("name", full_name)

    nationalities = parse_countries(row.pop("Nationality", None))
    entity.add("nationality", nationalities, quiet=True)
    position = split_items(row.pop("Position", None))
    entity.add("position", position, quiet=True)

    birth_countries = parse_countries(row.pop("CountryOfBirth", None))
    entity.add("country", birth_countries, quiet=True)

    countries = parse_countries(row.pop("Country", None))
    entity.add("country", countries)
    pob = split_items(row.pop("TownOfBirth", None))
    entity.add("birthPlace", pob, quiet=True)

    address = h.make_address(
        context,
        full=row.pop("FullAddress", None),
        street=row.pop("address1", None),
        street2=row.pop("address2", None),
        street3=row.pop("address3", None),
        city=row.pop("address4", None),
        place=row.pop("address5", None),
        region=row.pop("address6", None),
        postal_code=row.pop("PostCode", None),
        country=first(countries),
    )
    h.apply_address(context, entity, address)

    reg_number = row.pop("BusinessRegNumber", None)
    entity.add_cast("LegalEntity", "registrationNumber", reg_number)

    phones = split_items(row.pop("PhoneNumber", None), comma=True)
    phones = h.clean_phones(phones)
    entity.add_cast("LegalEntity", "phone", phones)

    website = split_items(row.pop("Website", None), comma=True)
    entity.add_cast("LegalEntity", "website", website)

    emails = split_items(row.pop("EmailAddress", None), comma=True)
    emails = h.clean_emails(emails)
    entity.add_cast("LegalEntity", "email", emails)

    # TODO: graph
    row.pop("Subsidiaries", None)
    row.pop("ParentCompany", None)
    row.pop("CurrentOwners", None)

    row.pop("DateListedDay", None)
    row.pop("DateListedMonth", None)
    row.pop("DateListedYear", None)
    row.pop("LastUpdatedDay", None)
    row.pop("LastUpdatedMonth", None)
    row.pop("LastUpdatedYear", None)
    row.pop("GrpStatus", None)
    row.pop("ID", None)
    row.pop("DateOfBirthId", None)
    row.pop("DateListedDay", None)
    if len(row):
        pprint(row)

    entity.add("topics", "sanction")
    context.emit(entity, target=True, unique=True)
    context.emit(sanction)