Пример #1
0
 def __init__(self, model, data):
     self.model = model
     self._data = data
     # Support output from Aleph's linkage API (profile_id):
     self.id = data.get('canonical_id', data.get('profile_id'))
     self.id = self.id or get_entity_id(data.get('canonical'))
     self._canonical = None
     self.entity_id = data.get('entity_id')
     self.entity_id = self.entity_id or get_entity_id(data.get('entity'))
     self._entity = None
     self.decision = data.get('decision')
     self._score = data.get('score', None)
Пример #2
0
 def __init__(self, model, data):
     self.model = model
     self._data = data
     # Support output from Aleph's linkage API (profile_id):
     self.id = data.get("canonical_id", data.get("profile_id"))
     self.id = self.id or get_entity_id(data.get("canonical"))
     self._canonical = None
     self.entity_id = data.get("entity_id")
     self.entity_id = self.entity_id or get_entity_id(data.get("entity"))
     self._entity = None
     self.decision = data.get("decision")
     self._score = data.get("score", None)
Пример #3
0
    def map(self, proxy, record, entities, **kwargs):
        kwargs.update(self.data)

        if self.entity is not None:
            entity = entities.get(self.entity)
            if entity is not None:
                proxy.add(self.prop, get_entity_id(entity))

                # This is really bad in theory, but really useful
                # in practice. Shoot me.
                text = proxy.schema.get('indexText')
                if text is not None:
                    for caption in entity.schema.caption:
                        proxy.add(text, entity.get(caption))

        # clean the values returned by the query, or by using literals, or
        # formats.
        values = []
        for value in self.record_values(record):
            value = self.type.clean(value, **kwargs)
            if value is not None:
                values.append(value)

        if self.join is not None:
            values = [self.join.join(values)]

        if self.split is not None:
            splote = []
            for value in values:
                splote = splote + value.split(self.split)
            values = splote

        proxy.add(self.prop, values)
Пример #4
0
    def map(self, record, entities, **kwargs):
        kwargs.update(self.data)

        if self.entity is not None:
            entity = entities.get(self.entity)
            return ensure_list(get_entity_id(entity))

        # clean the values returned by the query, or by using literals, or
        # formats.
        values = []
        for value in self.record_values(record):
            value = self.type.clean(value, **kwargs)
            if value is not None:
                values.append(value)

        if self.join is not None:
            values = [self.join.join(values)]

        if self.split is not None:
            splote = []
            for value in values:
                splote = splote + value.split(self.split)
            values = splote

        return unique_list(values)
Пример #5
0
    def map(self, proxy, record, entities):
        if self.entity is not None:
            entity = entities.get(self.entity)
            if entity is not None:
                proxy.add(self.prop, get_entity_id(entity))
                inline_names(proxy, entity)

        # clean the values returned by the query, or by using literals, or
        # formats.
        values = []
        for value in self.record_values(record):
            value = self.type.clean(value, proxy=proxy, **self.data)
            if value is not None:
                values.append(value)

        if self.join is not None:
            values = [self.join.join(values)]

        if self.split is not None:
            splote = []
            for value in values:
                splote = splote + value.split(self.split)
            values = splote

        proxy.add(self.prop, values)
Пример #6
0
 def clean(self, text, **kwargs):
     entity_id = get_entity_id(text)
     if entity_id is None:
         return
     entity_id = str(entity_id)
     if self.REGEX.match(entity_id) is not None:
         return entity_id
Пример #7
0
 def resolve(self, subject):
     """Given an entity or entity ID, return the canonicalised ID that
     should be used going forward."""
     subject = get_entity_id(subject)
     cluster = self.clusters.get(subject)
     if cluster is None:
         return subject
     return cluster.id
Пример #8
0
def channel(obj, clazz=None):
    clazz = clazz or type(obj)
    if clazz == str:
        return obj

    obj = get_entity_id(obj)
    if obj is not None:
        return '%s:%s' % (clazz.__name__, obj)
Пример #9
0
def channel(obj, clazz=None):
    clazz = clazz or type(obj)
    if clazz == str:
        return obj

    obj = get_entity_id(obj)
    if obj is not None:
        return '%s:%s' % (clazz.__name__, obj)
Пример #10
0
    def add(self, subject, canonical):
        subject, _ = Namespace.parse(get_entity_id(subject))
        canonical, _ = Namespace.parse(get_entity_id(canonical))

        # Don't do no-ops.
        if subject == canonical:
            return
        if subject is None or canonical is None:
            return

        cluster = Cluster(canonical, subject)
        cluster = self.clusters.get(canonical, cluster)
        if subject in self.clusters:
            previous = self.clusters.get(subject)
            cluster.update(previous.entities)

        for entity in cluster.entities:
            self.clusters[entity] = cluster
Пример #11
0
 def clean(
     self,
     raw: Any,
     fuzzy: bool = False,
     format: Optional[str] = None,
     proxy: Optional["EntityProxy"] = None,
 ) -> Optional[str]:
     entity_id = get_entity_id(raw)
     if entity_id is None:
         return None
     return self.clean_text(entity_id, fuzzy=fuzzy, format=format, proxy=proxy)
Пример #12
0
 def apply(self, proxy, shallow=False):
     """Rewrite an entity proxy so all IDs mentioned are limited to
     the namespace."""
     signed = proxy.clone()
     signed.id = self.sign(proxy.id)
     if not shallow:
         for prop in proxy.iterprops():
             if prop.type != registry.entity:
                 continue
             for value in signed.pop(prop):
                 value = get_entity_id(value)
                 signed.add(prop, self.sign(value))
     return signed
Пример #13
0
 def save(cls,
          session,
          subject,
          candidate,
          score=None,
          judgement=None,
          priority=None):
     obj = cls.by_id(session, subject, candidate)
     if obj is None:
         obj = cls()
         obj.id = cls.make_id(subject, candidate)
         obj.subject, _ = Namespace.parse(get_entity_id(subject))
         obj.candidate, _ = Namespace.parse(get_entity_id(candidate))
     priority = priority or DEFAULT_PRIORITY
     if score is not None:
         obj.score = score
         obj.priority = score * priority
     if judgement is not None:
         obj.judgement = judgement
     obj.updated_at = now()
     session.add(obj)
     return obj
Пример #14
0
 def __init__(self,
              subject,
              prop,
              value,
              weight=1.0,
              inverted=False,
              inferred=False):
     self.subject = subject
     self.prop = prop
     self.value = get_entity_id(value)
     self.weight = weight
     self.inverted = inverted
     self.inferred = inferred
Пример #15
0
def index_notification(event, actor_id, params, channels, sync=False):
    """Index a notification."""
    params = params or {}
    params = {n: get_entity_id(params.get(n)) for n in event.params.keys()}
    channels = list(set([c for c in channels if c is not None]))
    data = {
        'actor_id': actor_id,
        'params': params,
        'event': event.name,
        'channels': channels,
        'created_at': datetime.utcnow(),
    }
    index = notifications_index()
    id_ = hash_data((actor_id, event.name, channels, params))
    return index_safe(index, id_, data, refresh=refresh_sync(sync))
Пример #16
0
    def validate(self, data):
        """Validate that the data should be stored.

        Since the types system doesn't really have validation, this currently
        tries to normalize the value to see if it passes strict parsing.
        """
        values = []
        for val in data:
            if self.stub:
                return gettext('Property cannot be written')
            val = get_entity_id(val)
            if not self.type.validate(val):
                return gettext('Invalid value')
            if val is not None:
                values.append(val)
Пример #17
0
def publish(event, actor_id=None, params=None, channels=None):
    """ Publish a notification to the given channels, while storing
    the parameters and initiating actor for the event. """
    assert isinstance(event, Event), event
    params = params or {}
    outparams = {}
    channels = [channel_tag(c) for c in ensure_list(channels)]
    for name, clazz in event.params.items():
        obj = params.get(name)
        outparams[name] = get_entity_id(obj)
    Notification.publish(event,
                         actor_id=actor_id,
                         params=outparams,
                         channels=channels)
    db.session.flush()
Пример #18
0
    def add(self, subject, canonical):
        subject = get_entity_id(subject)
        canonical = get_entity_id(canonical)

        # Don't do no-ops.
        if subject == canonical:
            return
        if subject is None or canonical is None:
            return
        resolved = self.resolve(canonical)

        # Circular dependencies
        if resolved == subject:
            resolved = max(subject, canonical)
            subject = min(subject, canonical)

        # Find existing references
        subjects = [subject]
        for (src, dst) in self.linkages.items():
            if dst == subject:
                subjects.append(src)
        for sub in subjects:
            if sub != resolved:
                self.linkages[sub] = resolved
Пример #19
0
def _normalize_data(data):
    """Turn entities in properties into entity ids"""
    entities = data['layout']['entities']
    for obj in entities:
        schema = model.get(obj.get('schema'))
        if schema is None:
            raise InvalidData("Invalid schema %s" % obj.get('schema'))
        properties = obj.get('properties', {})
        for name, values in list(properties.items()):
            prop = schema.get(name)
            if prop.type == registry.entity:
                properties[prop.name] = []
                for value in ensure_list(values):
                    entity_id = get_entity_id(value)
                    properties[prop.name].append(entity_id)
    return data
Пример #20
0
    def apply(self, proxy):
        """Rewrite an entity proxy so all IDs mentioned are limited to
        the namespace.

        An exception is made for sameAs declarations."""
        signed = proxy.clone()
        signed.id = self.sign(proxy.id)
        for prop in proxy.iterprops():
            if prop.type != registry.entity:
                continue
            for value in signed.pop(prop):
                value = get_entity_id(value)
                signed.add(prop, self.sign(value))
        # linked.add('sameAs', proxy.id, quiet=True)
        signed.remove('sameAs', signed.id)
        return signed
Пример #21
0
def publish(event, actor_id=None, params=None, channels=None):
    """ Publish a notification to the given channels, while storing
    the parameters and initiating actor for the event. """
    assert isinstance(event, Event), event
    params = params or {}
    outparams = {}
    channels = ensure_list(channels)
    channels.append(channel(actor_id, clazz=Role))
    for name, clazz in event.params.items():
        obj = params.get(name)
        outparams[name] = get_entity_id(obj)
        channels.append(channel(obj, clazz=clazz))
    Notification.publish(event,
                         actor_id=actor_id,
                         params=outparams,
                         channels=channels)
    db.session.flush()
Пример #22
0
def index_notification(event, actor_id, params, channels, sync=False):
    """Index a notification."""
    params = params or {}
    data = {}
    for param, value in params.items():
        value = get_entity_id(value)
        if value is not None:
            data[param] = str(value)
    channels = list(set([c for c in channels if c is not None]))
    data = {
        "actor_id": actor_id,
        "params": data,
        "event": event.name,
        "channels": channels,
        "created_at": datetime.utcnow(),
    }
    index = notifications_index()
    id_ = hash_data((actor_id, event.name, channels, params))
    return index_safe(index, id_, data, sync=sync)
Пример #23
0
 def resolve(self, subject):
     """Given an entity or entity ID, return the canonicalised ID that
     should be used going forward."""
     subject = get_entity_id(subject)
     return self.linkages.get(subject, subject)
Пример #24
0
 def has(self, subject):
     subject = get_entity_id(subject)
     return subject in self.linkages
Пример #25
0
 def clean(self, text, **kwargs):
     entity_id = get_entity_id(text)
     if self.validate(entity_id):
         return entity_id
Пример #26
0
 def __init__(self, subject, canonical, judgement):
     self.subject = get_entity_id(subject)
     self.canonical = get_entity_id(canonical)
     self.judgement = judgement or self.UNSURE
Пример #27
0
 def entity(self, entity):
     self._entity = entity
     self.entity_id = get_entity_id(entity)
Пример #28
0
 def canonical(self, entity):
     self._canonical = entity
     self.id = get_entity_id(entity)
Пример #29
0
 def clean(self, text, **kwargs):
     return get_entity_id(text)
Пример #30
0
 def make_id(cls, subject, candidate):
     subject, _ = Namespace.parse(get_entity_id(subject))
     candidate, _ = Namespace.parse(get_entity_id(candidate))
     return '.'.join((subject, candidate))
Пример #31
0
 def __init__(self, type_, value):
     self.type = type_
     self.value = get_entity_id(value)
     self.uri = self.type.rdf(self.value)
Пример #32
0
 def has(self, subject):
     subject = get_entity_id(subject)
     return subject in self.clusters