def to_dict(self): data = {'label': gettext(self.label), 'plural': gettext(self.plural)} if self.group: data['group'] = self.group if self.matchable: data['matchable'] = True return data
def add(self, prop, values, cleaned=False, quiet=False, fuzzy=False): """Add the given value(s) to the property if they are not empty.""" prop_name = self._prop_name(prop, quiet=quiet) if prop_name is None: return prop = self.schema.properties[prop_name] # Don't allow setting the reverse properties: if prop.stub: if quiet: return msg = gettext("Stub property (%s): %s") raise InvalidData(msg % (self.schema, prop)) for value in value_list(values): if not cleaned: value = prop.type.clean(value, proxy=self, fuzzy=fuzzy) if value is None: continue if prop.type == registry.entity and value == self.id: msg = gettext("Self-relationship (%s): %s") raise InvalidData(msg % (self.schema, prop)) # Somewhat hacky: limit the maximum size of any particular # field to avoid overloading upstream aleph/elasticsearch. value_size = len(value) if prop.type.max_size is not None: if self._size + value_size > prop.type.max_size: # msg = "[%s] too large. Rejecting additional values." # log.warning(msg, prop.name) continue self._size += value_size self._properties.setdefault(prop_name, set()) self._properties[prop_name].add(value)
def add(self, prop, values, cleaned=False, quiet=False): """Add the given value(s) to the property if they are not empty.""" prop = self._get_prop(prop, quiet=quiet) if prop is None: return # Don't allow setting the reverse properties: if prop.stub: if quiet: return msg = gettext("Stub property (%s): %s") raise InvalidData(msg % (self.schema, prop)) for value in ensure_list(values): if not cleaned: value = prop.type.clean(value, countries=self.countries) if value is None or not isinstance(value, Hashable): continue if prop.type == registry.entity and value == self.id: msg = gettext("Self-relationship (%s): %s") raise InvalidData(msg % (self.schema, prop)) # Somewhat hacky: limit the maximum size of any particular # field to avoid overloading upstream aleph/elasticsearch. value_size = prop.type.values_size(value) if prop.type.max_size is not None: if self._size + value_size > prop.type.max_size: msg = "[%s] too large. Rejecting additional values." log.warning(msg, prop.name) continue self._size += value_size if prop not in self._properties: self._properties[prop] = set() self._properties[prop].add(value)
def to_dict(self): data = {"label": gettext(self.label), "plural": gettext(self.plural)} if self.group: data["group"] = self.group if self.matchable: data["matchable"] = True if self.pivot: data["pivot"] = True return data
def to_dict(self) -> Dict[str, Any]: """Return a serialisable description of this data type.""" data = {"label": gettext(self.label), "plural": gettext(self.plural)} if self.group: data["group"] = self.group if self.matchable: data["matchable"] = True if self.pivot: data["pivot"] = True return data
def to_dict(self) -> PropertyTypeToDict: """Return a serialisable description of this data type.""" data: PropertyTypeToDict = { "label": gettext(self.label), "plural": gettext(self.plural), } if self.group: data["group"] = self.group if self.matchable: data["matchable"] = True if self.pivot: data["pivot"] = True return data
def validate(self, data): """Validate that the data should be stored. Since the types system doesn't really have validation, this currently tries to normalize the value to see if it passes strict parsing. """ values = [] for val in data: if self.stub: return gettext('Property cannot be written') val = get_entity_id(val) if not self.type.validate(val): return gettext('Invalid value') if val is not None: values.append(val)
def __init__(self, model, data, key_prefix=None, cleaned=True): data = dict(data) properties = data.pop("properties", {}) if not cleaned: properties = ensure_dict(properties) self.schema = model.get(data.pop("schema", None)) if self.schema is None: raise InvalidData(gettext("No schema for entity.")) self.key_prefix = key_prefix self.id = data.pop("id", None) if not cleaned: self.id = sanitize_text(self.id) self.context = data self._properties = {} self._size = 0 for key, value in properties.items(): if key not in self.schema.properties: continue if not cleaned: self.add(key, value, cleaned=cleaned, quiet=True) else: values = set(value) self._properties[key] = values self._size += sum([len(v) for v in values])
def validate(self, data): """Validate that the data should be stored. Since the types system doesn't really have validation, this currently tries to normalize the value to see if it passes strict parsing. """ values = [] for val in ensure_list(data): if is_mapping(val): val = val.get('id') if not self.type.validate(val): return gettext('Invalid value') if val is not None: values.append(val) if self.required and not len(values): return gettext('Required')
def from_dict(cls, model, data): if isinstance(data, cls): return data schema = model.get(data.get('schema')) if schema is None: raise InvalidData(gettext('No schema for entity.')) return cls(schema, data.get('id'), data.get('properties'))
def validate(self, data): """Validate a dataset against the given schema. This will also drop keys which are not present as properties. """ errors = {} properties = ensure_dict(data.get("properties")) for name, prop in self.properties.items(): values = ensure_list(properties.get(name)) error = prop.validate(values) if error is None and not len(values): if prop.name in self.required: error = gettext("Required") if error is not None: errors[name] = error if len(errors): msg = gettext("Entity validation failed") raise InvalidData(msg, errors={"properties": errors})
def _get_prop(self, prop, quiet=False): if isinstance(prop, Property): return prop if prop not in self.schema.properties: if quiet: return msg = gettext("Unknown property (%s): %s") raise InvalidData(msg % (self.schema, prop)) return self.schema.get(prop)
def validate(self, data: Any) -> Optional[str]: """Validate a dictionary against the given schema. This will also drop keys which are not valid as properties. """ errors = {} properties = cast(Dict[str, Any], ensure_dict(data.get("properties"))) for name, prop in self.properties.items(): values = ensure_list(properties.get(name, [])) error = prop.validate(values) if error is None and not len(values): if prop.name in self.required: error = gettext("Required") if error is not None: errors[name] = error if len(errors): msg = gettext("Entity validation failed") raise InvalidData(msg, errors={"properties": errors}) return None
def add(self, prop, values, cleaned=False, quiet=False, fuzzy=False): """Add the given value(s) to the property if they are valid for the type of the property. :param prop: can be given as a name or an instance of :class:`~followthemoney.property.Property`. :param values: either a single value, or a list of values to be added. :param cleaned: should the data be normalised before adding it. :param quiet: a reference to an non-existent property will return an empty list instead of raising an error. :param fuzzy: when normalising the data, should fuzzy matching be allowed. """ prop_name = self._prop_name(prop, quiet=quiet) if prop_name is None: return prop = self.schema.properties[prop_name] # Don't allow setting the reverse properties: if prop.stub: if quiet: return msg = gettext("Stub property (%s): %s") raise InvalidData(msg % (self.schema, prop)) for value in value_list(values): if not cleaned: value = prop.type.clean(value, proxy=self, fuzzy=fuzzy) if value is None: continue if prop.type == registry.entity and value == self.id: msg = gettext("Self-relationship (%s): %s") raise InvalidData(msg % (self.schema, prop)) # Somewhat hacky: limit the maximum size of any particular # field to avoid overloading upstream aleph/elasticsearch. value_size = len(value) if prop.type.max_size is not None: if self._size + value_size > prop.type.max_size: # msg = "[%s] too large. Rejecting additional values." # log.warning(msg, prop.name) continue self._size += value_size self._properties.setdefault(prop_name, set()) self._properties[prop_name].add(value)
def validate(self, data): """Validate that the data should be stored. Since the types system doesn't really have validation, this currently tries to normalize the value to see if it passes strict parsing. """ values, error = [], None for val in ensure_list(data): if isinstance(val, dict): val = val.get('id') if not self.type.validate(val): error = gettext('Invalid value') else: values.append(val) if self.required and not len(values): error = gettext('Required') if error is not None: return ensure_list(data), error values = list(set(values)) return values, None
def _prop_name(self, prop, quiet=False): # This is pretty unwound because it gets called a *lot*. if prop in self.schema.properties: return prop try: if prop.name in self.schema.properties: return prop.name except AttributeError: pass if quiet: return msg = gettext("Unknown property (%s): %s") raise InvalidData(msg % (self.schema, prop))
def validate(self, data): """Validate a dataset against the given schema. This will also drop keys which are not present as properties. """ errors = {} properties = ensure_dict(data.get('properties')) for name, prop in self.properties.items(): values = properties.get(name) error = prop.validate(values) if error is not None: errors[name] = error if len(errors): msg = gettext("Entity failed validation") raise InvalidData(msg, errors={'properties': errors})
def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]: # This is pretty unwound because it gets called a *lot*. if prop in self.schema.properties: return cast(str, prop) try: obj = cast(Property, prop) if obj.name in self.schema.properties: return obj.name except AttributeError: pass if quiet: return None msg = gettext("Unknown property (%s): %s") raise InvalidData(msg % (self.schema, prop))
def __init__(self, model, data, key_prefix=None): data = dict(data) properties = ensure_dict(data.pop('properties', {})) self.schema = model.get(data.pop('schema', None)) if self.schema is None: raise InvalidData(gettext('No schema for entity.')) self.id = sanitize_text(data.pop('id', None)) self.key_prefix = sanitize_text(key_prefix) self.context = data self._properties = {} self._size = 0 if is_mapping(properties): for key, value in properties.items(): self.add(key, value, cleaned=True, quiet=True)
def __init__( self, model: "Model", data: Dict[str, Any], key_prefix: Optional[str] = None, cleaned: bool = True, ): data = dict(data or {}) properties = data.pop("properties", {}) if not cleaned: properties = ensure_dict(properties) #: The schema definition for this entity, which implies the properties #: That can be set on it. schema = model.get(data.pop("schema", None)) if schema is None: raise InvalidData(gettext("No schema for entity.")) self.schema = schema #: When using :meth:`~make_id` to generate a natural key for this entity, #: the prefix will be added to the ID as a salt to make it easier to keep #: IDs unique across datasets. This is somewhat redundant following the #: introduction of :class:`~followthemoney.namespace.Namespace`. self.key_prefix = key_prefix #: A unique identifier for this entity, usually a hashed natural key, #: a UUID, or a very simple slug. Can be signed using a #: :class:`~followthemoney.namespace.Namespace`. self.id = data.pop("id", None) if not cleaned: self.id = sanitize_text(self.id) #: If the input dictionary for the entity proxy contains fields other #: than ``id``, ``schema`` or ``properties``, they will be kept in here #: and re-added upon serialization. self.context = data self._properties: Dict[str, Set[str]] = {} self._size = 0 for key, value in properties.items(): if key not in self.schema.properties: continue if not cleaned: self.add(key, value, cleaned=cleaned, quiet=True) else: values = set(value) self._properties[key] = values self._size += sum([len(v) for v in values])
def clean_text( self, text: str, fuzzy: bool = False, format: Optional[str] = None, proxy: Optional["EntityProxy"] = None, ) -> Optional[str]: """Specific types can apply their own cleaning routines here (this is called by ``clean`` after the value has been converted to a string and null values have been filtered).""" if proxy is not None and text == proxy.id: msg = gettext("Self-relationship (%s): %s") raise InvalidData(msg % (proxy.schema, text)) if self.REGEX.match(text) is not None: return text return None
def add( self, prop: P, values: Any, cleaned: bool = False, quiet: bool = False, fuzzy: bool = False, format: Optional[str] = None, ) -> None: """Add the given value(s) to the property if they are valid for the type of the property. :param prop: can be given as a name or an instance of :class:`~followthemoney.property.Property`. :param values: either a single value, or a list of values to be added. :param cleaned: should the data be normalised before adding it. :param quiet: a reference to an non-existent property will return an empty list instead of raising an error. :param fuzzy: when normalising the data, should fuzzy matching be allowed. :param format: when normalising the data, formatting for a date. """ prop_name = self._prop_name(prop, quiet=quiet) if prop_name is None: return None prop = self.schema.properties[prop_name] # Don't allow setting the reverse properties: if prop.stub: if quiet: return None msg = gettext("Stub property (%s): %s") raise InvalidData(msg % (self.schema, prop)) for value in value_list(values): if not cleaned: value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format) self.unsafe_add(prop, value, cleaned=True) return None
def _locale_names(self, locale: Locale) -> EnumValues: # extra territories that OCCRP is interested in. names = { "zz": gettext("Global"), "eu": gettext("European Union"), "zr": gettext("Zaire"), # Overwrite "Czechia" label: "cz": gettext("Czech Republic"), "xk": gettext("Kosovo"), "dd": gettext("East Germany"), "yucs": gettext("Yugoslavia"), "csxx": gettext("Serbia and Montenegro"), "cshh": gettext("Czechoslovakia"), "suhh": gettext("Soviet Union"), "ge-ab": gettext("Abkhazia (Occupied Georgia)"), "x-so": gettext("South Ossetia (Occupied Georgia)"), "ua-lpr": gettext("Luhansk (Occupied Ukraine)"), "ua-dpr": gettext("Donetsk (Occupied Ukraine)"), "ua-cri": gettext("Crimea (Occupied Ukraine)"), "so-som": gettext("Somaliland"), "cy-trnc": gettext("Northern Cyprus"), "az-nk": gettext("Nagorno-Karabakh"), "cn-xz": gettext("Tibet"), "gg-srk": gettext("Sark"), "gb-wls": gettext("Wales"), "gb-sct": gettext("Scotland"), "gb-nir": gettext("Northern Ireland"), "md-pmr": gettext("Transnistria (PMR)"), } for code, label in locale.territories.items(): code = code.lower() if code in names: continue try: int(code) except ValueError: names[code] = label return names
def description(self): return gettext(self._description)
def label(self): return gettext(self._label)
def edge_label(self): return gettext(self._edge_label)
def plural(self): return gettext(self._plural)
def plural(self): """Name of the schema to be used in plural constructions.""" return gettext(self._plural)
def edge_label(self): """Description label for edges derived from entities of this schema.""" return gettext(self._edge_label)
def description(self): """A longer description of the semantics of the schema.""" return gettext(self._description)