示例#1
0
 def to_dict(self):
     data = {'label': gettext(self.label), 'plural': gettext(self.plural)}
     if self.group:
         data['group'] = self.group
     if self.matchable:
         data['matchable'] = True
     return data
示例#2
0
    def add(self, prop, values, cleaned=False, quiet=False, fuzzy=False):
        """Add the given value(s) to the property if they are not empty."""
        prop_name = self._prop_name(prop, quiet=quiet)
        if prop_name is None:
            return
        prop = self.schema.properties[prop_name]

        # Don't allow setting the reverse properties:
        if prop.stub:
            if quiet:
                return
            msg = gettext("Stub property (%s): %s")
            raise InvalidData(msg % (self.schema, prop))

        for value in value_list(values):
            if not cleaned:
                value = prop.type.clean(value, proxy=self, fuzzy=fuzzy)
            if value is None:
                continue
            if prop.type == registry.entity and value == self.id:
                msg = gettext("Self-relationship (%s): %s")
                raise InvalidData(msg % (self.schema, prop))

            # Somewhat hacky: limit the maximum size of any particular
            # field to avoid overloading upstream aleph/elasticsearch.
            value_size = len(value)
            if prop.type.max_size is not None:
                if self._size + value_size > prop.type.max_size:
                    # msg = "[%s] too large. Rejecting additional values."
                    # log.warning(msg, prop.name)
                    continue
            self._size += value_size
            self._properties.setdefault(prop_name, set())
            self._properties[prop_name].add(value)
示例#3
0
    def add(self, prop, values, cleaned=False, quiet=False):
        """Add the given value(s) to the property if they are not empty."""
        prop = self._get_prop(prop, quiet=quiet)
        if prop is None:
            return

        # Don't allow setting the reverse properties:
        if prop.stub:
            if quiet:
                return
            msg = gettext("Stub property (%s): %s")
            raise InvalidData(msg % (self.schema, prop))

        for value in ensure_list(values):
            if not cleaned:
                value = prop.type.clean(value, countries=self.countries)
            if value is None or not isinstance(value, Hashable):
                continue
            if prop.type == registry.entity and value == self.id:
                msg = gettext("Self-relationship (%s): %s")
                raise InvalidData(msg % (self.schema, prop))

            # Somewhat hacky: limit the maximum size of any particular
            # field to avoid overloading upstream aleph/elasticsearch.
            value_size = prop.type.values_size(value)
            if prop.type.max_size is not None:
                if self._size + value_size > prop.type.max_size:
                    msg = "[%s] too large. Rejecting additional values."
                    log.warning(msg, prop.name)
                    continue
            self._size += value_size

            if prop not in self._properties:
                self._properties[prop] = set()
            self._properties[prop].add(value)
示例#4
0
 def to_dict(self):
     data = {"label": gettext(self.label), "plural": gettext(self.plural)}
     if self.group:
         data["group"] = self.group
     if self.matchable:
         data["matchable"] = True
     if self.pivot:
         data["pivot"] = True
     return data
示例#5
0
 def to_dict(self) -> Dict[str, Any]:
     """Return a serialisable description of this data type."""
     data = {"label": gettext(self.label), "plural": gettext(self.plural)}
     if self.group:
         data["group"] = self.group
     if self.matchable:
         data["matchable"] = True
     if self.pivot:
         data["pivot"] = True
     return data
示例#6
0
 def to_dict(self) -> PropertyTypeToDict:
     """Return a serialisable description of this data type."""
     data: PropertyTypeToDict = {
         "label": gettext(self.label),
         "plural": gettext(self.plural),
     }
     if self.group:
         data["group"] = self.group
     if self.matchable:
         data["matchable"] = True
     if self.pivot:
         data["pivot"] = True
     return data
示例#7
0
    def validate(self, data):
        """Validate that the data should be stored.

        Since the types system doesn't really have validation, this currently
        tries to normalize the value to see if it passes strict parsing.
        """
        values = []
        for val in data:
            if self.stub:
                return gettext('Property cannot be written')
            val = get_entity_id(val)
            if not self.type.validate(val):
                return gettext('Invalid value')
            if val is not None:
                values.append(val)
示例#8
0
    def __init__(self, model, data, key_prefix=None, cleaned=True):
        data = dict(data)
        properties = data.pop("properties", {})
        if not cleaned:
            properties = ensure_dict(properties)
        self.schema = model.get(data.pop("schema", None))
        if self.schema is None:
            raise InvalidData(gettext("No schema for entity."))
        self.key_prefix = key_prefix
        self.id = data.pop("id", None)
        if not cleaned:
            self.id = sanitize_text(self.id)
        self.context = data
        self._properties = {}
        self._size = 0

        for key, value in properties.items():
            if key not in self.schema.properties:
                continue
            if not cleaned:
                self.add(key, value, cleaned=cleaned, quiet=True)
            else:
                values = set(value)
                self._properties[key] = values
                self._size += sum([len(v) for v in values])
示例#9
0
    def validate(self, data):
        """Validate that the data should be stored.

        Since the types system doesn't really have validation, this currently
        tries to normalize the value to see if it passes strict parsing.
        """
        values = []
        for val in ensure_list(data):
            if is_mapping(val):
                val = val.get('id')
            if not self.type.validate(val):
                return gettext('Invalid value')
            if val is not None:
                values.append(val)
        if self.required and not len(values):
            return gettext('Required')
示例#10
0
 def from_dict(cls, model, data):
     if isinstance(data, cls):
         return data
     schema = model.get(data.get('schema'))
     if schema is None:
         raise InvalidData(gettext('No schema for entity.'))
     return cls(schema, data.get('id'), data.get('properties'))
示例#11
0
 def validate(self, data):
     """Validate a dataset against the given schema.
     This will also drop keys which are not present as properties.
     """
     errors = {}
     properties = ensure_dict(data.get("properties"))
     for name, prop in self.properties.items():
         values = ensure_list(properties.get(name))
         error = prop.validate(values)
         if error is None and not len(values):
             if prop.name in self.required:
                 error = gettext("Required")
         if error is not None:
             errors[name] = error
     if len(errors):
         msg = gettext("Entity validation failed")
         raise InvalidData(msg, errors={"properties": errors})
示例#12
0
 def _get_prop(self, prop, quiet=False):
     if isinstance(prop, Property):
         return prop
     if prop not in self.schema.properties:
         if quiet:
             return
         msg = gettext("Unknown property (%s): %s")
         raise InvalidData(msg % (self.schema, prop))
     return self.schema.get(prop)
示例#13
0
 def validate(self, data: Any) -> Optional[str]:
     """Validate a dictionary against the given schema.
     This will also drop keys which are not valid as properties.
     """
     errors = {}
     properties = cast(Dict[str, Any], ensure_dict(data.get("properties")))
     for name, prop in self.properties.items():
         values = ensure_list(properties.get(name, []))
         error = prop.validate(values)
         if error is None and not len(values):
             if prop.name in self.required:
                 error = gettext("Required")
         if error is not None:
             errors[name] = error
     if len(errors):
         msg = gettext("Entity validation failed")
         raise InvalidData(msg, errors={"properties": errors})
     return None
示例#14
0
    def add(self, prop, values, cleaned=False, quiet=False, fuzzy=False):
        """Add the given value(s) to the property if they are valid for
        the type of the property.

        :param prop: can be given as a name or an instance of
            :class:`~followthemoney.property.Property`.
        :param values: either a single value, or a list of values to be added.
        :param cleaned: should the data be normalised before adding it.
        :param quiet: a reference to an non-existent property will return
            an empty list instead of raising an error.
        :param fuzzy: when normalising the data, should fuzzy matching be allowed.
        """
        prop_name = self._prop_name(prop, quiet=quiet)
        if prop_name is None:
            return
        prop = self.schema.properties[prop_name]

        # Don't allow setting the reverse properties:
        if prop.stub:
            if quiet:
                return
            msg = gettext("Stub property (%s): %s")
            raise InvalidData(msg % (self.schema, prop))

        for value in value_list(values):
            if not cleaned:
                value = prop.type.clean(value, proxy=self, fuzzy=fuzzy)
            if value is None:
                continue
            if prop.type == registry.entity and value == self.id:
                msg = gettext("Self-relationship (%s): %s")
                raise InvalidData(msg % (self.schema, prop))

            # Somewhat hacky: limit the maximum size of any particular
            # field to avoid overloading upstream aleph/elasticsearch.
            value_size = len(value)
            if prop.type.max_size is not None:
                if self._size + value_size > prop.type.max_size:
                    # msg = "[%s] too large. Rejecting additional values."
                    # log.warning(msg, prop.name)
                    continue
            self._size += value_size
            self._properties.setdefault(prop_name, set())
            self._properties[prop_name].add(value)
示例#15
0
    def validate(self, data):
        """Validate that the data should be stored.

        Since the types system doesn't really have validation, this currently
        tries to normalize the value to see if it passes strict parsing.
        """
        values, error = [], None
        for val in ensure_list(data):
            if isinstance(val, dict):
                val = val.get('id')
            if not self.type.validate(val):
                error = gettext('Invalid value')
            else:
                values.append(val)
        if self.required and not len(values):
            error = gettext('Required')
        if error is not None:
            return ensure_list(data), error
        values = list(set(values))
        return values, None
示例#16
0
 def _prop_name(self, prop, quiet=False):
     # This is pretty unwound because it gets called a *lot*.
     if prop in self.schema.properties:
         return prop
     try:
         if prop.name in self.schema.properties:
             return prop.name
     except AttributeError:
         pass
     if quiet:
         return
     msg = gettext("Unknown property (%s): %s")
     raise InvalidData(msg % (self.schema, prop))
示例#17
0
 def validate(self, data):
     """Validate a dataset against the given schema.
     This will also drop keys which are not present as properties.
     """
     errors = {}
     properties = ensure_dict(data.get('properties'))
     for name, prop in self.properties.items():
         values = properties.get(name)
         error = prop.validate(values)
         if error is not None:
             errors[name] = error
     if len(errors):
         msg = gettext("Entity failed validation")
         raise InvalidData(msg, errors={'properties': errors})
示例#18
0
 def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]:
     # This is pretty unwound because it gets called a *lot*.
     if prop in self.schema.properties:
         return cast(str, prop)
     try:
         obj = cast(Property, prop)
         if obj.name in self.schema.properties:
             return obj.name
     except AttributeError:
         pass
     if quiet:
         return None
     msg = gettext("Unknown property (%s): %s")
     raise InvalidData(msg % (self.schema, prop))
示例#19
0
    def __init__(self, model, data, key_prefix=None):
        data = dict(data)
        properties = ensure_dict(data.pop('properties', {}))
        self.schema = model.get(data.pop('schema', None))
        if self.schema is None:
            raise InvalidData(gettext('No schema for entity.'))
        self.id = sanitize_text(data.pop('id', None))
        self.key_prefix = sanitize_text(key_prefix)
        self.context = data
        self._properties = {}
        self._size = 0

        if is_mapping(properties):
            for key, value in properties.items():
                self.add(key, value, cleaned=True, quiet=True)
示例#20
0
    def __init__(
        self,
        model: "Model",
        data: Dict[str, Any],
        key_prefix: Optional[str] = None,
        cleaned: bool = True,
    ):
        data = dict(data or {})
        properties = data.pop("properties", {})
        if not cleaned:
            properties = ensure_dict(properties)

        #: The schema definition for this entity, which implies the properties
        #: That can be set on it.
        schema = model.get(data.pop("schema", None))
        if schema is None:
            raise InvalidData(gettext("No schema for entity."))
        self.schema = schema

        #: When using :meth:`~make_id` to generate a natural key for this entity,
        #: the prefix will be added to the ID as a salt to make it easier to keep
        #: IDs unique across datasets. This is somewhat redundant following the
        #: introduction of :class:`~followthemoney.namespace.Namespace`.
        self.key_prefix = key_prefix

        #: A unique identifier for this entity, usually a hashed natural key,
        #: a UUID, or a very simple slug. Can be signed using a
        #: :class:`~followthemoney.namespace.Namespace`.
        self.id = data.pop("id", None)
        if not cleaned:
            self.id = sanitize_text(self.id)

        #: If the input dictionary for the entity proxy contains fields other
        #: than ``id``, ``schema`` or ``properties``, they will be kept in here
        #: and re-added upon serialization.
        self.context = data
        self._properties: Dict[str, Set[str]] = {}
        self._size = 0

        for key, value in properties.items():
            if key not in self.schema.properties:
                continue
            if not cleaned:
                self.add(key, value, cleaned=cleaned, quiet=True)
            else:
                values = set(value)
                self._properties[key] = values
                self._size += sum([len(v) for v in values])
示例#21
0
 def clean_text(
     self,
     text: str,
     fuzzy: bool = False,
     format: Optional[str] = None,
     proxy: Optional["EntityProxy"] = None,
 ) -> Optional[str]:
     """Specific types can apply their own cleaning routines here (this is called
     by ``clean`` after the value has been converted to a string and null values
     have been filtered)."""
     if proxy is not None and text == proxy.id:
         msg = gettext("Self-relationship (%s): %s")
         raise InvalidData(msg % (proxy.schema, text))
     if self.REGEX.match(text) is not None:
         return text
     return None
示例#22
0
    def add(
        self,
        prop: P,
        values: Any,
        cleaned: bool = False,
        quiet: bool = False,
        fuzzy: bool = False,
        format: Optional[str] = None,
    ) -> None:
        """Add the given value(s) to the property if they are valid for
        the type of the property.

        :param prop: can be given as a name or an instance of
            :class:`~followthemoney.property.Property`.
        :param values: either a single value, or a list of values to be added.
        :param cleaned: should the data be normalised before adding it.
        :param quiet: a reference to an non-existent property will return
            an empty list instead of raising an error.
        :param fuzzy: when normalising the data, should fuzzy matching be allowed.
        :param format: when normalising the data, formatting for a date.
        """
        prop_name = self._prop_name(prop, quiet=quiet)
        if prop_name is None:
            return None
        prop = self.schema.properties[prop_name]

        # Don't allow setting the reverse properties:
        if prop.stub:
            if quiet:
                return None
            msg = gettext("Stub property (%s): %s")
            raise InvalidData(msg % (self.schema, prop))

        for value in value_list(values):
            if not cleaned:
                value = prop.type.clean(value,
                                        proxy=self,
                                        fuzzy=fuzzy,
                                        format=format)
            self.unsafe_add(prop, value, cleaned=True)
        return None
示例#23
0
 def _locale_names(self, locale: Locale) -> EnumValues:
     # extra territories that OCCRP is interested in.
     names = {
         "zz": gettext("Global"),
         "eu": gettext("European Union"),
         "zr": gettext("Zaire"),
         # Overwrite "Czechia" label:
         "cz": gettext("Czech Republic"),
         "xk": gettext("Kosovo"),
         "dd": gettext("East Germany"),
         "yucs": gettext("Yugoslavia"),
         "csxx": gettext("Serbia and Montenegro"),
         "cshh": gettext("Czechoslovakia"),
         "suhh": gettext("Soviet Union"),
         "ge-ab": gettext("Abkhazia (Occupied Georgia)"),
         "x-so": gettext("South Ossetia (Occupied Georgia)"),
         "ua-lpr": gettext("Luhansk (Occupied Ukraine)"),
         "ua-dpr": gettext("Donetsk (Occupied Ukraine)"),
         "ua-cri": gettext("Crimea (Occupied Ukraine)"),
         "so-som": gettext("Somaliland"),
         "cy-trnc": gettext("Northern Cyprus"),
         "az-nk": gettext("Nagorno-Karabakh"),
         "cn-xz": gettext("Tibet"),
         "gg-srk": gettext("Sark"),
         "gb-wls": gettext("Wales"),
         "gb-sct": gettext("Scotland"),
         "gb-nir": gettext("Northern Ireland"),
         "md-pmr": gettext("Transnistria (PMR)"),
     }
     for code, label in locale.territories.items():
         code = code.lower()
         if code in names:
             continue
         try:
             int(code)
         except ValueError:
             names[code] = label
     return names
示例#24
0
 def description(self):
     return gettext(self._description)
示例#25
0
 def label(self):
     return gettext(self._label)
示例#26
0
 def edge_label(self):
     return gettext(self._edge_label)
示例#27
0
 def plural(self):
     return gettext(self._plural)
示例#28
0
 def plural(self):
     """Name of the schema to be used in plural constructions."""
     return gettext(self._plural)
示例#29
0
 def edge_label(self):
     """Description label for edges derived from entities of this schema."""
     return gettext(self._edge_label)
示例#30
0
 def description(self):
     """A longer description of the semantics of the schema."""
     return gettext(self._description)