Python InvalidData примеры, followthemoney.exc.InvalidData Python примеры использования

Пример #1

0

Показать файл

def bulk_write(collection, items):
    """Write a set of entities - given as raw dicts - to the index in bulk
    mode. This will perform validation but is dangerous as it means the
    application has no control over key generation and a few other aspects
    of building the entity.
    """
    entities = {}
    for item in items:
        if not is_mapping(item):
            raise InvalidData("Failed to read input data")

        entity = model.get_proxy(item)
        if entity.id is None:
            raise InvalidData("No ID for entity")

        if entity.id in entities:
            entities[entity.id].merge(entity)
        else:
            entities[entity.id] = entity

        if len(entities) >= BULK_PAGE:
            index.index_bulk(collection.id, entities)
            entities = {}

    if len(entities):
        index.index_bulk(collection.id, entities)

Пример #2

0

Показать файл

Файл: processing.py Проект: djoffrey/aleph

def bulk_write(collection, entities, unsafe=False, role_id=None, index=True):
    """Write a set of entities - given as dicts - to the index."""
    # This is called mainly by the /api/2/collections/X/_bulk API.
    now = datetime.utcnow().isoformat()
    aggregator = get_aggregator(collection)
    writer = aggregator.bulk()
    entity_ids = set()
    for data in entities:
        if not is_mapping(data):
            raise InvalidData("Failed to read input data", errors=data)
        entity = model.get_proxy(data)
        if entity.id is None:
            raise InvalidData("No ID for entity", errors=entity.to_dict())
        entity = collection.ns.apply(entity)
        if not unsafe:
            entity = remove_checksums(entity)
        entity.context = {
            'role_id': role_id,
            'created_at': now,
            'updated_at': now,
        }
        writer.put(entity, origin='bulk')
        if index and len(entity_ids) < MAX_PAGE:
            entity_ids.add(entity.id)
    writer.flush()
    if index:
        if len(entity_ids) >= MAX_PAGE:
            entity_ids = None
        index_aggregator(collection, aggregator, entity_ids=entity_ids)
        refresh_collection(collection.id)

Пример #3

0

Показать файл

def bulk_write(collection, items, merge=True):
    """Write a set of entities - given as dicts - to the index in bulk
    mode. This will perform validation but is dangerous as it means the
    application has no control over key generation and a few other aspects
    of building the entity.
    """
    namespace = Namespace(collection.foreign_id)
    entities = {}
    for item in items:
        if not is_mapping(item):
            raise InvalidData("Failed to read input data", errors=item)

        entity = model.get_proxy(item)
        entity = namespace.apply(entity)
        entity.context = {
            'bulk': True,
            'collection_id': collection.id
        }
        if entity.id is None:
            raise InvalidData("No ID for entity", errors=item)

        if entity.id in entities:
            entities[entity.id].merge(entity)
        else:
            entities[entity.id] = entity

        if len(entities) >= BULK_PAGE:
            index.index_bulk(collection.id, entities, merge=merge)
            entities = {}

    if len(entities):
        index.index_bulk(collection.id, entities, merge=merge)

    refresh_collection(collection)

Пример #4

0

Показать файл

    def add(self, prop, values, cleaned=False, quiet=False):
        """Add the given value(s) to the property if they are not empty."""
        prop = self._get_prop(prop, quiet=quiet)
        if prop is None:
            return

        # Don't allow setting the reverse properties:
        if prop.stub:
            if quiet:
                return
            msg = gettext("Stub property (%s): %s")
            raise InvalidData(msg % (self.schema, prop))

        for value in ensure_list(values):
            if not cleaned:
                value = prop.type.clean(value, countries=self.countries)
            if value is None or not isinstance(value, Hashable):
                continue
            if prop.type == registry.entity and value == self.id:
                msg = gettext("Self-relationship (%s): %s")
                raise InvalidData(msg % (self.schema, prop))

            # Somewhat hacky: limit the maximum size of any particular
            # field to avoid overloading upstream aleph/elasticsearch.
            value_size = prop.type.values_size(value)
            if prop.type.max_size is not None:
                if self._size + value_size > prop.type.max_size:
                    msg = "[%s] too large. Rejecting additional values."
                    log.warning(msg, prop.name)
                    continue
            self._size += value_size

            if prop not in self._properties:
                self._properties[prop] = set()
            self._properties[prop].add(value)

Пример #5

0

Показать файл

Файл: proxy.py Проект: StephenGrey/followthemoney

    def add(self, prop, values, cleaned=False, quiet=False, fuzzy=False):
        """Add the given value(s) to the property if they are not empty."""
        prop_name = self._prop_name(prop, quiet=quiet)
        if prop_name is None:
            return
        prop = self.schema.properties[prop_name]

        # Don't allow setting the reverse properties:
        if prop.stub:
            if quiet:
                return
            msg = gettext("Stub property (%s): %s")
            raise InvalidData(msg % (self.schema, prop))

        for value in value_list(values):
            if not cleaned:
                value = prop.type.clean(value, proxy=self, fuzzy=fuzzy)
            if value is None:
                continue
            if prop.type == registry.entity and value == self.id:
                msg = gettext("Self-relationship (%s): %s")
                raise InvalidData(msg % (self.schema, prop))

            # Somewhat hacky: limit the maximum size of any particular
            # field to avoid overloading upstream aleph/elasticsearch.
            value_size = len(value)
            if prop.type.max_size is not None:
                if self._size + value_size > prop.type.max_size:
                    # msg = "[%s] too large. Rejecting additional values."
                    # log.warning(msg, prop.name)
                    continue
            self._size += value_size
            self._properties.setdefault(prop_name, set())
            self._properties[prop_name].add(value)

Пример #6

0

Показать файл

 def _generate():
     for data in entities:
         if not is_mapping(data):
             raise InvalidData("Failed to read input data", errors=data)
         entity = model.get_proxy(data)
         if entity.id is None:
             raise InvalidData("No ID for entity", errors=entity.to_dict())
         if not unsafe:
             entity = remove_checksums(entity)
         yield _process_entity(entity)

Пример #7

0

Показать файл

    def precise_schema(self, left, right):
        """Select the most narrow of two schemata.

        When indexing data from a dataset, an entity may be declared as a
        LegalEntity in one query, and as a Person in another. This function
        will select the most specific of two schemata offered. In the example,
        that would be Person.
        """
        if left == right:
            return left
        lefts = self.get(left)
        if lefts is None:
            return right
        if right in lefts.names:
            return left

        rights = self.get(right)
        if rights is None:
            return left
        if left in rights.names:
            return right

        # Find a common ancestor:
        for left in lefts.names:
            for right in rights.names:
                if left == right:
                    return left

        raise InvalidData("No common ancestor: %s and %s" % (left, right))

Пример #8

0

Показать файл

def bulk_write(collection,
               entities,
               safe=False,
               role_id=None,
               mutable=True,
               index=True):
    """Write a set of entities - given as dicts - to the index."""
    # This is called mainly by the /api/2/collections/X/_bulk API.
    aggregator = get_aggregator(collection)
    writer = aggregator.bulk()
    entity_ids = set()
    for data in entities:
        entity = model.get_proxy(data, cleaned=False)
        if entity.id is None:
            raise InvalidData("No ID for entity", errors=entity.to_dict())
        entity = collection.ns.apply(entity)
        if safe:
            entity = remove_checksums(entity)
        entity.context = {"role_id": role_id, "mutable": mutable}
        for field in ("created_at", "updated_at"):
            timestamp = data.get(field)
            if timestamp is not None:
                dt = registry.date.to_datetime(timestamp)
                if dt is not None:
                    entity.context[field] = dt.isoformat()
        writer.put(entity, origin="bulk")
        if index and len(entity_ids) < MAX_PAGE:
            entity_ids.add(entity.id)
    writer.flush()
    if index:
        if len(entity_ids) >= MAX_PAGE:
            entity_ids = None
        index_aggregator(collection, aggregator, entity_ids=entity_ids)
        refresh_collection(collection.id)

Пример #9

0

Показать файл

Файл: proxy.py Проект: winers1290/followthemoney

 def from_dict(cls, model, data):
     if isinstance(data, cls):
         return data
     schema = model.get(data.get('schema'))
     if schema is None:
         raise InvalidData(gettext('No schema for entity.'))
     return cls(schema, data.get('id'), data.get('properties'))

Пример #10

0

Показать файл

    def common_schema(self, left, right):
        """Select the most narrow of two schemata.

        When indexing data from a dataset, an entity may be declared as a
        LegalEntity in one query, and as a Person in another. This function
        will select the most specific of two schemata offered. In the example,
        that would be Person.
        """
        left = self.get(left) or self.get(right)
        right = self.get(right) or self.get(left)
        left_schemata = list(left.schemata)
        right_schemata = list(right.schemata)
        if right in left_schemata:
            return left
        if left in right_schemata:
            return right

        # Find a common ancestor:
        for left in left_schemata:
            for right in right_schemata:
                if left == right:
                    return left

        msg = "No common ancestor: %s and %s"
        raise InvalidData(msg % (left, right))

Пример #11

0

Показать файл

Файл: proxy.py Проект: StephenGrey/followthemoney

    def __init__(self, model, data, key_prefix=None, cleaned=True):
        data = dict(data)
        properties = data.pop("properties", {})
        if not cleaned:
            properties = ensure_dict(properties)
        self.schema = model.get(data.pop("schema", None))
        if self.schema is None:
            raise InvalidData(gettext("No schema for entity."))
        self.key_prefix = key_prefix
        self.id = data.pop("id", None)
        if not cleaned:
            self.id = sanitize_text(self.id)
        self.context = data
        self._properties = {}
        self._size = 0

        for key, value in properties.items():
            if key not in self.schema.properties:
                continue
            if not cleaned:
                self.add(key, value, cleaned=cleaned, quiet=True)
            else:
                values = set(value)
                self._properties[key] = values
                self._size += sum([len(v) for v in values])

Пример #12

0

Показать файл

Файл: entity.py Проект: ngquoccuong/opensanctions

 def add_schema(self, schema: Union[str, Schema]) -> None:
     """Try to apply the given schema to the current entity, making it more
     specific (e.g. turning a `LegalEntity` into a `Company`). This raises an
     exception if the current and new type are incompatible."""
     try:
         self.schema = model.common_schema(self.schema, schema)
     except InvalidData as exc:
         raise InvalidData(f"{self.id}: {exc}") from exc

Пример #13

0

Показать файл

Файл: entity.py Проект: x0rzkov/aleph

 def create(cls, data, collection, validate=True):
     entity = cls()
     entity_id = data.get('id') or make_textid()
     if not registry.entity.validate(entity_id):
         raise InvalidData(gettext("Invalid entity ID"))
     entity.id = collection.ns.sign(entity_id)
     entity.collection_id = collection.id
     entity.update(data, collection, validate=validate)
     return entity

Пример #14

0

Показать файл

 def _get_prop(self, prop, quiet=False):
     if isinstance(prop, Property):
         return prop
     if prop not in self.schema.properties:
         if quiet:
             return
         msg = gettext("Unknown property (%s): %s")
         raise InvalidData(msg % (self.schema, prop))
     return self.schema.get(prop)

Пример #15

0

Показать файл

    def add(self, prop, values, cleaned=False, quiet=False, fuzzy=False):
        """Add the given value(s) to the property if they are valid for
        the type of the property.

        :param prop: can be given as a name or an instance of
            :class:`~followthemoney.property.Property`.
        :param values: either a single value, or a list of values to be added.
        :param cleaned: should the data be normalised before adding it.
        :param quiet: a reference to an non-existent property will return
            an empty list instead of raising an error.
        :param fuzzy: when normalising the data, should fuzzy matching be allowed.
        """
        prop_name = self._prop_name(prop, quiet=quiet)
        if prop_name is None:
            return
        prop = self.schema.properties[prop_name]

        # Don't allow setting the reverse properties:
        if prop.stub:
            if quiet:
                return
            msg = gettext("Stub property (%s): %s")
            raise InvalidData(msg % (self.schema, prop))

        for value in value_list(values):
            if not cleaned:
                value = prop.type.clean(value, proxy=self, fuzzy=fuzzy)
            if value is None:
                continue
            if prop.type == registry.entity and value == self.id:
                msg = gettext("Self-relationship (%s): %s")
                raise InvalidData(msg % (self.schema, prop))

            # Somewhat hacky: limit the maximum size of any particular
            # field to avoid overloading upstream aleph/elasticsearch.
            value_size = len(value)
            if prop.type.max_size is not None:
                if self._size + value_size > prop.type.max_size:
                    # msg = "[%s] too large. Rejecting additional values."
                    # log.warning(msg, prop.name)
                    continue
            self._size += value_size
            self._properties.setdefault(prop_name, set())
            self._properties[prop_name].add(value)

Пример #16

0

Показать файл

def _process_entity(entity, sync=False):
    """Perform pre-index processing on an entity, includes running the
    NLP pipeline."""
    if entity.id is None:
        raise InvalidData("No ID for entity", errors=entity.to_dict())
    tag_entity(entity)
    if sync:
        refresh_entity_id(entity.id)
    # log.debug("Index: %r", entity)
    return entity

Пример #17

0

Показать файл

 def create(cls, data, collection, role_id=None):
     entity = cls()
     entity_id = data.get("id") or make_textid()
     if not registry.entity.validate(entity_id):
         raise InvalidData(gettext("Invalid entity ID"))
     entity.id = collection.ns.sign(entity_id)
     entity.collection_id = collection.id
     entity.role_id = role_id
     entity.update(data, collection)
     return entity

Пример #18

0

Показать файл

Файл: entities.py Проект: wayne9qiu/aleph

def validate_entity(data):
    """Check that there is a valid schema and all FtM conform to it."""
    schema = model.get(data.get("schema"))
    if schema is None:
        raise InvalidData(gettext("No schema on entity"))
    # This isn't strictly required because the proxy will contain
    # only those values that can be inserted for each property,
    # making it valid -- all this does, therefore, is to raise an
    # exception that notifies the user.
    schema.validate(data)

Пример #19

0

Показать файл

    def merge(self, other):
        model = self.schema.model
        other = self.from_dict(model, other)
        self.id = self.id or other.id
        try:
            self.schema = model.common_schema(self.schema, other.schema)
        except InvalidData as e:
            msg = "Cannot merge entities with id %s: %s"
            raise InvalidData(msg % (self.id, e))

        self.context.update(other.context)
        for prop, value in set(other.itervalues()):
            self.add(prop, value, cleaned=True, quiet=True)

Пример #20

0

Показать файл

Файл: proxy.py Проект: StephenGrey/followthemoney

 def _prop_name(self, prop, quiet=False):
     # This is pretty unwound because it gets called a *lot*.
     if prop in self.schema.properties:
         return prop
     try:
         if prop.name in self.schema.properties:
             return prop.name
     except AttributeError:
         pass
     if quiet:
         return
     msg = gettext("Unknown property (%s): %s")
     raise InvalidData(msg % (self.schema, prop))

Пример #21

0

Показать файл

Файл: proxy.py Проект: StephenGrey/followthemoney

    def merge(self, other):
        model = self.schema.model
        self.id = self.id or other.id
        try:
            self.schema = model.common_schema(self.schema, other.schema)
        except InvalidData as e:
            msg = "Cannot merge entities with id %s: %s"
            raise InvalidData(msg % (self.id, e))

        self.context = merge_context(self.context, other.context)
        for prop, values in other._properties.items():
            self.add(prop, values, cleaned=True, quiet=True)
        return self

Пример #22

0

Показать файл

Файл: schema.py Проект: Ueland/followthemoney

 def validate(self, data):
     """Validate a dataset against the given schema.
     This will also drop keys which are not present as properties.
     """
     errors = {}
     properties = ensure_dict(data.get('properties'))
     for name, prop in self.properties.items():
         values = properties.get(name)
         error = prop.validate(values)
         if error is not None:
             errors[name] = error
     if len(errors):
         raise InvalidData({'properties': errors})

Пример #23

0

Показать файл

Файл: model.py Проект: vishalbelsare/followthemoney

    def common_schema(self, left: Union[str, Schema],
                      right: Union[str, Schema]) -> Schema:
        """Select the most narrow of two schemata.

        When indexing data from a dataset, an entity may be declared as a
        LegalEntity in one query, and as a Person in another. This function
        will select the most specific of two schemata offered. In the example,
        that would be Person.
        """
        left_schema = self.get(left) or self.get(right)
        right_schema = self.get(right) or self.get(left)
        if left_schema is None or right_schema is None:
            raise InvalidData("Invalid schema")
        if left_schema.is_a(right_schema):
            return left_schema
        if right_schema.is_a(left_schema):
            return right_schema
        # for schema in self.schemata.values():
        #     if schema.is_a(left) and schema.is_a(right):
        #         return schema
        msg = "No common schema: %s and %s"
        raise InvalidData(msg % (left, right))

Пример #24

0

Показать файл

Файл: proxy.py Проект: simonwoerpel/followthemoney

 def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]:
     # This is pretty unwound because it gets called a *lot*.
     if prop in self.schema.properties:
         return cast(str, prop)
     try:
         obj = cast(Property, prop)
         if obj.name in self.schema.properties:
             return obj.name
     except AttributeError:
         pass
     if quiet:
         return None
     msg = gettext("Unknown property (%s): %s")
     raise InvalidData(msg % (self.schema, prop))

Пример #25

0

Показать файл

Файл: graph.py Проект: tolgatasci/followthemoney

 def __init__(
     self,
     type_: PropertyType,
     value: str,
     proxy: Optional[EntityProxy] = None,
     schema: Optional[Schema] = None,
 ) -> None:
     self.type = type_
     self.value = value
     _id = type_.node_id_safe(value)
     if _id is None:
         raise InvalidData("No ID for node")
     self.id = _id
     self.proxy = proxy
     self.schema = schema if proxy is None else proxy.schema

Пример #26

0

Показать файл

    def __init__(self, model, data, key_prefix=None):
        data = dict(data)
        properties = ensure_dict(data.pop('properties', {}))
        self.schema = model.get(data.pop('schema', None))
        if self.schema is None:
            raise InvalidData(gettext('No schema for entity.'))
        self.id = sanitize_text(data.pop('id', None))
        self.key_prefix = sanitize_text(key_prefix)
        self.context = data
        self._properties = {}
        self._size = 0

        if is_mapping(properties):
            for key, value in properties.items():
                self.add(key, value, cleaned=True, quiet=True)

Пример #27

0

Показать файл

Файл: proxy.py Проект: simonwoerpel/followthemoney

    def __init__(
        self,
        model: "Model",
        data: Dict[str, Any],
        key_prefix: Optional[str] = None,
        cleaned: bool = True,
    ):
        data = dict(data or {})
        properties = data.pop("properties", {})
        if not cleaned:
            properties = ensure_dict(properties)

        #: The schema definition for this entity, which implies the properties
        #: That can be set on it.
        schema = model.get(data.pop("schema", None))
        if schema is None:
            raise InvalidData(gettext("No schema for entity."))
        self.schema = schema

        #: When using :meth:`~make_id` to generate a natural key for this entity,
        #: the prefix will be added to the ID as a salt to make it easier to keep
        #: IDs unique across datasets. This is somewhat redundant following the
        #: introduction of :class:`~followthemoney.namespace.Namespace`.
        self.key_prefix = key_prefix

        #: A unique identifier for this entity, usually a hashed natural key,
        #: a UUID, or a very simple slug. Can be signed using a
        #: :class:`~followthemoney.namespace.Namespace`.
        self.id = data.pop("id", None)
        if not cleaned:
            self.id = sanitize_text(self.id)

        #: If the input dictionary for the entity proxy contains fields other
        #: than ``id``, ``schema`` or ``properties``, they will be kept in here
        #: and re-added upon serialization.
        self.context = data
        self._properties: Dict[str, Set[str]] = {}
        self._size = 0

        for key, value in properties.items():
            if key not in self.schema.properties:
                continue
            if not cleaned:
                self.add(key, value, cleaned=cleaned, quiet=True)
            else:
                values = set(value)
                self._properties[key] = values
                self._size += sum([len(v) for v in values])

Пример #28

0

Показать файл

Файл: test_diagrams_api.py Проект: x0rzkov/aleph

def _normalize_data(data):
    """Turn entities in properties into entity ids"""
    entities = data['layout']['entities']
    for obj in entities:
        schema = model.get(obj.get('schema'))
        if schema is None:
            raise InvalidData("Invalid schema %s" % obj.get('schema'))
        properties = obj.get('properties', {})
        for name, values in list(properties.items()):
            prop = schema.get(name)
            if prop.type == registry.entity:
                properties[prop.name] = []
                for value in ensure_list(values):
                    entity_id = get_entity_id(value)
                    properties[prop.name].append(entity_id)
    return data

Пример #29

0

Показать файл

 def clean_text(
     self,
     text: str,
     fuzzy: bool = False,
     format: Optional[str] = None,
     proxy: Optional["EntityProxy"] = None,
 ) -> Optional[str]:
     """Specific types can apply their own cleaning routines here (this is called
     by ``clean`` after the value has been converted to a string and null values
     have been filtered)."""
     if proxy is not None and text == proxy.id:
         msg = gettext("Self-relationship (%s): %s")
         raise InvalidData(msg % (proxy.schema, text))
     if self.REGEX.match(text) is not None:
         return text
     return None

Пример #30

0

Показать файл

Файл: proxy.py Проект: simonwoerpel/followthemoney

    def merge(self, other: "EntityProxy") -> "EntityProxy":
        """Merge another entity proxy into this one. This will try and find
        the common schema between both entities and then add all property
        values from the other entity into this one."""
        model = self.schema.model
        self.id = self.id or other.id
        try:
            self.schema = model.common_schema(self.schema, other.schema)
        except InvalidData as e:
            msg = "Cannot merge entities with id %s: %s"
            raise InvalidData(msg % (self.id, e))

        self.context = merge_context(self.context, other.context)
        for prop, values in other._properties.items():
            self.add(prop, values, cleaned=True, quiet=True)
        return self

Python InvalidData примеры использования