示例#1
0
def convert_party(party):
    entity = model.make_entity('LegalEntity')
    party_id = party.pop('id', None)
    identifier = party.pop('identifier', {})
    if party_id is None:
        party_id = identifier.get('id')
    entity.make_id(party_id)
    convert_name(entity, party)
    convert_address(entity, party.pop('address', {}))
    convert_address(entity, party.pop('deliveryAddress', {}))
    entity.add('legalForm', party.pop('organizationType', None))
    contact = party.pop('contactPoint', {})
    entity.add('website', contact.pop('url', None))
    entity.add('phone', contact.pop('telephone', None))
    entity.add('email', contact.pop('email', None))
    convert_identifier(entity, identifier)
    for identifier in party.pop('additionalIdentifiers', []):
        convert_identifier(entity, identifier)
    yield entity
    for mem in ensure_list(party.pop('memberOf', [])):
        for other in convert_party(mem):
            other.schema = model.get('Organization')
            yield other
            mem = model.make_entity('Membership')
            mem.make_id(entity.id, other.id)
            mem.add('member', entity)
            mem.add('organization', other)
            yield mem

    party.pop('roles', None)
def make_relation(root_path, documents, api, cid, entity):
    relation = entity.pop('relation')
    if relation not in RELATIONS:
        return
    schema, subject_prop, object_prop, prop = RELATIONS.get(relation)
    proxy = model.make_entity(schema)
    proxy.make_id(entity.pop('edge'))
    if prop is not None:
        proxy.add(prop, relation)
    subject = model.make_entity('Thing')
    subject.make_id(entity.pop('subject'))
    proxy.add(subject_prop, subject.id)
    object_ = model.make_entity('Thing')
    object_.make_id(entity.pop('object'))
    proxy.add(object_prop, object_.id)

    for section, values in entity.items():
        for value in values:
            if section == 'Source':
                # TODO: no 'proof' on Intervals.
                upload_document(root_path, documents, api, cid, value)
                continue
            prop = PROPERTIES.get(section)
            if schema == 'Associate' and prop == 'role':
                prop = 'relationship'
            proxy.add(prop, value)

    print(repr(proxy))
    yield proxy
示例#3
0
 def test_make_id(self):
     proxy = model.make_entity("Thing")
     assert not proxy.make_id(None)
     assert proxy.make_id("banana")
     assert proxy.make_id("banana") == proxy.make_id("banana")
     ff = proxy.make_id(44)
     assert ff is not None
     proxy = model.make_entity("Thing", key_prefix="foo")
     assert proxy.make_id(44)
     assert proxy.make_id(44) != ff, proxy.make_id(44)
示例#4
0
 def test_make_id(self):
     proxy = model.make_entity('Thing')
     assert not proxy.make_id(None)
     assert proxy.make_id('banana')
     assert proxy.make_id('banana') == proxy.make_id('banana')
     ff = proxy.make_id(44)
     assert ff is not None
     proxy = model.make_entity('Thing', key_prefix='foo')
     assert proxy.make_id(44)
     assert proxy.make_id(44) != ff, proxy.make_id(44)
示例#5
0
def write_edges(writer, db):
    for i, edge in enumerate(db["edges"], 1):
        source_id = edge.pop("start_id", None)
        source_id = edge.pop("node_1", source_id)
        source = make_node_entity(source_id)
        target_id = edge.pop("end_id", None)
        target_id = edge.pop("node_2", target_id)
        target = make_node_entity(target_id)
        type_ = edge.pop("rel_type", None)
        type_ = edge.pop("type", type_)
        link = edge.pop("link", None)
        if type_ in IGNORE_EDGE_TYPES:
            continue
        if type_ in SAME_AS:
            source.add("sameAs", target)
            writer.put(source, fragment=target.id)
            target.add("sameAs", source)
            writer.put(target, fragment=source.id)
            continue
        schema = edge_schema(type_, link)
        # print(type_, link, schema)
        proxy = model.make_entity(schema)
        proxy.make_id(source_id, target_id, type_, link)
        proxy.add("startDate", parse_date(edge.pop("start_date", None)))
        proxy.add("endDate", parse_date(edge.pop("end_date", None)))
        proxy.add("summary", edge.pop("valid_until", None))
        proxy.add(proxy.schema.source_prop, source)
        proxy.add(proxy.schema.target_prop, target)
        proxy.add("role", link)
        if link is None:
            proxy.add("role", type_)
        writer.put(proxy)

        if i % 10000 == 0:
            print("edges: %s" % i)
示例#6
0
 def __init__(self, dataset, entity, context):
     self.dataset = dataset
     self.ns = Namespace(context.get("namespace", dataset.name))
     self.entity = model.make_entity(entity.schema)
     self.entity.id = entity.id
     self.aggregator_entities = TagAggregatorFasttext()
     self.aggregator_patterns = TagAggregator()
示例#7
0
 def test_de_number(self):
     phones = registry.phone
     proxy = model.make_entity("Person")
     proxy.add("country", "DE")
     self.assertEqual(phones.clean("017623423980"), None)
     self.assertEqual(phones.clean("017623423980", proxy=proxy),
                      "+4917623423980")
示例#8
0
    def test_rdf(self):
        proxy = EntityProxy.from_dict(model, ENTITY)
        triples = list(proxy.triples())
        assert 10 == len(triples), len(triples)

        proxy = model.make_entity("Person")
        assert 0 == len(list(proxy.triples()))
示例#9
0
 def company_entity(self, data, entity=None):
     if "company" in data:
         data = ensure_dict(data.get("company", data))
     if entity is None:
         entity = model.make_entity("Company")
         entity.make_id(data.get("opencorporates_url"))
     entity.add("name", data.get("name"))
     address = ensure_dict(data.get("registered_address"))
     entity.add("country", address.get("country"))
     entity.add("jurisdiction", data.get("jurisdiction_code"))
     entity.add("alias", data.get("alternative_names"))
     entity.add("address", data.get("registered_address_in_full"))
     entity.add("sourceUrl", data.get("registry_url"))
     entity.add("legalForm", data.get("company_type"))
     entity.add("incorporationDate", data.get("incorporation_date"))
     entity.add("dissolutionDate", data.get("dissolution_date"))
     entity.add("status", data.get("current_status"))
     entity.add("registrationNumber", data.get("company_number"))
     entity.add("opencorporatesUrl", data.get("opencorporates_url"))
     source = data.get("source", {})
     entity.add("publisher", source.get("publisher"))
     entity.add("publisherUrl", source.get("url"))
     entity.add("retrievedAt", source.get("retrieved_at"))
     for code in ensure_list(data.get("industry_codes")):
         code = code.get("industry_code", code)
         entity.add("sector", code.get("description"))
     for previous in ensure_list(data.get("previous_names")):
         entity.add("previousName", previous.get("company_name"))
     for alias in ensure_list(data.get("alternative_names")):
         entity.add("alias", alias.get("company_name"))
     return entity
示例#10
0
def convert_party(party):
    entity = model.make_entity('LegalEntity')
    entity.make_id(party.pop('id', None))
    entity.add('name', party.pop('name', None))
    address = party.pop('address', {})
    entity.add('country', address.pop('countryName', None))
    address_text = make_address(address.pop('streetAddress', None),
                                address.pop('postalCode', None),
                                address.pop('region', None))
    entity.add('address', address_text)
    if len(address):
        log.info("Unknown address part: %r", address.keys())
    contact = party.pop('contactPoint', {})
    entity.add('website', contact.pop('url', None))
    entity.add('phone', contact.pop('telephone', None))
    entity.add('email', contact.pop('email', None))
    for identifier in party.pop('additionalIdentifiers', []):
        scheme = identifier.pop('scheme', None)
        prop = IDENTIFIERS.get(scheme, None)
        if prop is None:
            log.info("Unknown identifier scheme: %s", scheme)
            continue
        entity.add(prop, identifier.pop('id', None))
    # pprint(party)
    return entity
    def parse_calls(self, call):
        entity = model.make_entity('Call')
        entity.make_id(self.project_id, call.get('id'))

        for timestamp in self._field_values(call, 'TimeStamp'):
            entity.add('date', self.parse_timestamp(timestamp))

        for duration in self._field_values(call, 'Duration'):
            entity.add('duration', self.get_seconds(duration))

        call_types = self._field_values(call, 'Type')
        if OUTGOING in call_types:
            entity.add('caller', self.owner)
            entity.add('callerNumber', self.owner.get('phone'))
        else:
            entity.add('receiver', self.owner)
            entity.add('receiverNumber', self.owner.get('phone'))

        for party in self.parse_parties(self._models(call, 'Party')):
            if OUTGOING in call_types:
                entity.add('receiver', party)
                entity.add('receiverNumber', party.get('phone'))
            else:
                entity.add('caller', party)
                entity.add('callerNumber', party.get('phone'))
            yield party

        yield entity
 def company_entity(self, data, entity=None):
     if 'company' in data:
         data = ensure_dict(data.get('company', data))
     if entity is None:
         entity = model.make_entity('Company')
         entity.make_id(data.get('opencorporates_url'))
     entity.add('name', data.get('name'))
     address = ensure_dict(data.get('registered_address'))
     entity.add('country', address.get('country'))
     entity.add('jurisdiction', data.get('jurisdiction_code'))
     entity.add('alias', data.get('alternative_names'))
     entity.add('address', data.get('registered_address_in_full'))
     entity.add('sourceUrl', data.get('registry_url'))
     entity.add('legalForm', data.get('company_type'))
     entity.add('incorporationDate', data.get('incorporation_date'))
     entity.add('dissolutionDate', data.get('dissolution_date'))
     entity.add('status', data.get('current_status'))
     entity.add('registrationNumber', data.get('company_number'))
     entity.add('opencorporatesUrl', data.get('opencorporates_url'))
     source = data.get('source', {})
     entity.add('publisher', source.get('publisher'))
     entity.add('publisherUrl', source.get('url'))
     entity.add('retrievedAt', source.get('retrieved_at'))
     for code in ensure_list(data.get('industry_codes')):
         code = code.get('industry_code', code)
         entity.add('sector', code.get('description'))
     for previous in ensure_list(data.get('previous_names')):
         entity.add('previousName', previous.get('company_name'))
     for alias in ensure_list(data.get('alternative_names')):
         entity.add('alias', alias.get('company_name'))
     return entity
示例#13
0
def update_entity(collection, entity_id=None):
    """Update xref and aggregator after an entity has been edited."""
    from aleph.logic.xref import xref_entity
    from aleph.logic.profiles import profile_fragments

    log.info("[%s] Update entity: %s", collection, entity_id)
    entity = index.get_entity(entity_id)
    proxy = model.get_proxy(entity)
    if collection.casefile:
        xref_entity(collection, proxy)

    aggregator = get_aggregator(collection, origin=MODEL_ORIGIN)
    profile_fragments(collection, aggregator, entity_id=entity_id)

    # Inline name properties from adjacent entities. See the
    # docstring on `inline_names` for a more detailed discussion.
    prop = proxy.schema.get("namesMentioned")
    if prop is not None:
        entity_ids = proxy.get_type_values(registry.entity)
        names = set()
        for related in index.entities_by_ids(entity_ids):
            related = model.get_proxy(related)
            names.update(related.get_type_values(registry.name))

        if len(names) > 0:
            name_proxy = model.make_entity(proxy.schema)
            name_proxy.id = proxy.id
            name_proxy.add(prop, names)
            aggregator.put(name_proxy, fragment="names")

    index_aggregator(collection, aggregator, entity_ids=[entity_id])
    refresh_entity(collection, proxy.id)
示例#14
0
def transform(input):
    prev = None
    entity = None
    for (s, p, o) in parse_triples(input):
        if s != prev and entity is not None:
            # print(entity.to_dict())
            pass
        if s != prev:
            prev = s
            if s.startswith(SPECIAL):
                qid = s[len(SPECIAL):]
            else:
                qid = s[len(ENTITY):]
            entity = model.make_entity("Thing")
            entity.make_id(qid)
            entity.add("wikidataId", qid)
            if s.startswith(ENTITY):
                entity.add("sourceUrl", str(s))

        if p in [SKOS.prefLabel, RDFS.label, SCHEMA.name]:
            entity.add("name", str(o))
            continue

        print(s, p, o)

        if p == PROP.P31:
            # print((p, o))
            pass
 def add_stub(self, proxy_id, schema="Thing"):
     stub = model.make_entity(schema)
     stub.id = proxy_id
     if proxy_id in self:
         return self.get_node_by_proxy(stub)
     node, _ = self.add_proxy(stub)
     self._stub_proxies.add(stub.id)
     return node
示例#16
0
 def test_language_tagging(self):
     text = "C'est le caniche d'Emmanuel Macron. " * 2
     entity = model.make_entity('PlainText')
     entity.add('bodyText', text)
     analyze_entity(entity)
     names = entity.get_type_values(registry.name)
     assert "d'Emmanuel Macron" in names, names
     assert entity.get('detectedLanguage') == ['fra'], entity.get('detectedLanguage')  # noqa
示例#17
0
 def to_proxy(self):
     if self.text is not None:
         proxy = model.make_entity(self.SCHEMA_PAGE)
         proxy.make_id('record', self.id)
         proxy.set('document', self.document_id)
         proxy.set('index', self.index)
         proxy.set('bodyText', stringify(self.text))
         return proxy
     else:
         proxy = model.make_entity(self.SCHEMA_ROW)
         proxy.make_id('record', self.id)
         proxy.set('table', self.document_id)
         proxy.set('index', self.index)
         if self.data is not None:
             values = [v for (k, v) in sorted(self.data.items())]
             proxy.set('cells', registry.json.pack(values))
         return proxy
示例#18
0
 def test_ner_extract(self):
     text = 'Das ist der Pudel von Angela Merkel. '
     text = text * 5
     entity = model.make_entity('PlainText')
     entity.add('bodyText', text)
     analyze_entity(entity)
     names = entity.get_type_values(registry.name)
     assert 'Angela Merkel' in names, names
示例#19
0
def create_link(sources, targets):
    proxy = model.make_entity("UnknownLink")
    proxy.make_id(random.sample(string.ascii_letters, 8))
    for s in sources:
        proxy.add("subject", s)
    for t in targets:
        proxy.add("object", t)
    return proxy
示例#20
0
文件: profiles.py 项目: sunu/aleph
def get_profile(entityset_id, authz=None):
    """A profile is an entityset having a party. The idea is to cache
    profile metadata for the API, and to generate a merged view of all
    the entities the current user has access to."""
    if entityset_id is None:
        return
    key = cache.object_key(EntitySet, entityset_id)
    data = cache.get_complex(key)
    stub = Stub()
    if data is None:
        entityset = get_entityset(entityset_id)
        if entityset is None:
            return
        data = entityset.to_dict()
        data["items"] = []
        for item in entityset.items():
            data["items"].append(item.to_dict())
        cache.set_complex(key, data, expires=cache.EXPIRE)

    # Filter the subset of items the current user can access
    if authz is not None:
        items = [
            i for i in data["items"]
            if authz.can(i["collection_id"], authz.READ)
        ]
        data["items"] = items

    # Load the constituent entities for the profile and generate a
    # combined proxy with all of the given properties.
    for item in data["items"]:
        if Judgement(item["judgement"]) == Judgement.POSITIVE:
            resolver.queue(stub, Entity, item.get("entity_id"))
    resolver.resolve(stub)
    merged = None
    data["proxies"] = []
    for item in data["items"]:
        item["entity"] = resolver.get(stub, Entity, item.get("entity_id"))
        if item["entity"] is not None:
            proxy = model.get_proxy(item["entity"])
            proxy.context = {}
            data["proxies"].append(proxy)
            if merged is None:
                merged = proxy.clone()
                merged.context["entities"] = [proxy.id]
            else:
                merged.merge(proxy)
                merged.context["entities"].append(proxy.id)

    if merged is None:
        merged = model.make_entity(Entity.LEGAL_ENTITY)

    # Polish it a bit:
    merged.id = data.get("id")
    merged = name_entity(merged)
    data["merged"] = merged
    data["label"] = merged.caption
    data["shallow"] = False
    return data
示例#21
0
    def test_base_functions(self):
        data = dict(ENTITY)
        data["properties"]["banana"] = ["foo"]
        proxy = EntityProxy.from_dict(model, data)
        assert "test" in repr(proxy), repr(proxy)
        assert hash(proxy) == hash(proxy.id)
        assert proxy.get("name") == ["Ralph Tester"]
        assert proxy.first("name") == "Ralph Tester"
        prop = model.get_qname("Thing:name")
        assert proxy.get(prop) == ["Ralph Tester"]
        assert proxy.caption == "Ralph Tester"
        assert str(proxy) == "Ralph Tester"

        name = "Ralph the Great"
        proxy.add("name", name)
        assert len(proxy.get("name")) == 2
        proxy.add("name", None)
        assert len(proxy.get("name")) == 2
        proxy.add("name", "")
        assert len(proxy.get("name")) == 2
        proxy.add("name", [""])
        assert len(proxy.get("name")) == 2
        proxy.add("name", {"name": "banana"})
        assert len(proxy.get("name")) == 3, proxy.get("name")
        assert name in proxy.get("name")
        assert name in proxy.names, proxy.names

        with raises(InvalidData):
            proxy.add("banana", "yellow")
        proxy.add("banana", "yellow", quiet=True)

        mem = model.make_entity("Membership")
        mem.id = "foo"
        with raises(InvalidData):
            proxy.add("directorshipDirector", mem)

        with raises(InvalidData):
            proxy.add("sameAs", proxy)

        with raises(InvalidData):
            proxy.get("banana")
        assert [] == proxy.get("banana", quiet=True)

        with raises(InvalidData):
            proxy.first("banana")
        assert proxy.first("banana", quiet=True) is None

        assert len(proxy.get("nationality")) == 0

        double = model.get_proxy(proxy)
        assert double == proxy

        proxy.add("banana", name, quiet=True)
        with raises(InvalidData):
            proxy.add("banana", name)

        with raises(InvalidData):
            EntityProxy.from_dict(model, {})
 def parse_notes(self, note):
     entity = model.make_entity('PlainText')
     entity.make_id(self.project_id, note.get('id'))
     entity.add('title', self._field_values(note, 'Title'))
     entity.add('summary', self._field_values(note, 'Summary'))
     entity.add('bodyText', self._field_values(note, 'Body'))
     for timestamp in self._field_values(note, 'Creation'):
         entity.add('date', self.parse_timestamp(timestamp))
     yield entity
示例#23
0
def _iter_mentions(collection):
    """Combine mentions into pseudo-entities used for xref."""
    proxy = model.make_entity(Entity.LEGAL_ENTITY)
    for mention in iter_proxies(
        collection_id=collection.id,
        schemata=["Mention"],
        sort={"properties.resolved": "desc"},
    ):
        if mention.first("resolved") != proxy.id:
            if proxy.id is not None:
                yield proxy
            proxy = model.make_entity(Entity.LEGAL_ENTITY)
            proxy.id = mention.first("resolved")
        _merge_schemata(proxy, mention.get("detectedSchema"))
        proxy.add("name", mention.get("name"))
        proxy.add("country", mention.get("contextCountry"))
    if proxy.id is not None:
        yield proxy
示例#24
0
 def test_pattern_extract(self):
     text = "Mr. Flubby Flubber called the number tel:+919988111222 twice"
     entity = model.make_entity('PlainText')
     entity.add('bodyText', text)
     analyze_entity(entity)
     phones = entity.get_type_values(registry.phone)
     assert '+919988111222' in phones
     countries = entity.get_type_values(registry.country)
     assert 'in' in countries
示例#25
0
 def test_ner_extract(self):
     text = "Das ist der Pudel von Angela Merkel. "
     text = text * 5
     entity = model.make_entity("PlainText")
     entity.id = "test1"
     entity.add("bodyText", text)
     entity = self._tagged_entity(entity)
     names = entity.get_type_values(registry.name)
     assert "Angela Merkel" in names, names
示例#26
0
    def test_base_functions(self):
        proxy = EntityProxy.from_dict(model, ENTITY)
        assert 'test' in repr(proxy), repr(proxy)
        assert hash(proxy) == hash(proxy.id)
        assert proxy.get('name') == ['Ralph Tester']
        assert proxy.first('name') == 'Ralph Tester'
        prop = model.get_qname('Thing:name')
        assert proxy.get(prop) == ['Ralph Tester']
        assert proxy.caption == 'Ralph Tester'
        assert str(proxy) == 'Ralph Tester'

        name = 'Ralph the Great'
        proxy.add('name', name)
        assert len(proxy.get('name')) == 2
        proxy.add('name', None)
        assert len(proxy.get('name')) == 2
        proxy.add('name', '')
        assert len(proxy.get('name')) == 2
        proxy.add('name', [''])
        assert len(proxy.get('name')) == 2
        proxy.add('name', {'name': 'banana'}, cleaned=True)
        assert len(proxy.get('name')) == 2
        assert name in proxy.get('name')
        assert name in proxy.names, proxy.names

        with assert_raises(InvalidData):
            proxy.add('banana', 'yellow')
        proxy.add('banana', 'yellow', quiet=True)

        mem = model.make_entity('Membership')
        mem.id = 'foo'
        with assert_raises(InvalidData):
            proxy.add('directorshipDirector', mem)

        with assert_raises(InvalidData):
            proxy.add('sameAs', proxy)

        with assert_raises(InvalidData):
            proxy.get('banana')
        assert [] == proxy.get('banana', quiet=True)

        with assert_raises(InvalidData):
            proxy.first('banana')
        assert proxy.first('banana', quiet=True) is None

        assert len(proxy.get('nationality')) == 0

        double = EntityProxy.from_dict(model, proxy)
        assert double == proxy

        proxy.add('banana', name, quiet=True)
        with assert_raises(InvalidData):
            proxy.add('banana', name)

        with assert_raises(InvalidData):
            EntityProxy.from_dict(model, {})
示例#27
0
def validate(infile, outfile):
    try:
        for entity in read_entities(infile, cleaned=False):
            clean = model.make_entity(entity.schema)
            clean.id = entity.id
            for (prop, value) in entity.itervalues():
                clean.add(prop, value)
            write_object(outfile, clean)
    except BrokenPipeError:
        raise click.Abort()
示例#28
0
    def test_rdf(self):
        proxy = EntityProxy.from_dict(model, ENTITY)
        statements = list(proxy.statements)
        assert 8 == len(statements), len(statements)
        triples = list(proxy.triples)
        count = len(statements) + 2
        assert count == len(triples), len(triples)

        proxy = model.make_entity('Person')
        assert 0 == len(list(proxy.triples))
示例#29
0
 def test_pattern_extract(self):
     text = "Mr. Flubby Flubber called the number tel:+919988111222 twice"
     entity = model.make_entity("PlainText")
     entity.id = "test3"
     entity.add("bodyText", text)
     entity = self._tagged_entity(entity)
     phones = entity.get_type_values(registry.phone)
     assert "+919988111222" in phones
     countries = entity.get_type_values(registry.country)
     assert "in" in countries
示例#30
0
 def test_language_tagging(self):
     text = "C'est le caniche d'Emmanuel Macron. " * 2
     entity = model.make_entity("PlainText")
     entity.id = "test2"
     entity.add("bodyText", text)
     entity = self._tagged_entity(entity)
     names = entity.get_type_values(registry.name)
     assert "Emmanuel Macron" in names, names
     assert entity.get("detectedLanguage") == ["fra"], entity.get(
         "detectedLanguage")  # noqa
示例#31
0
 def to_proxy(self):
     if self.text is not None:
         proxy = model.make_entity(self.SCHEMA_PAGE)
         proxy.make_id('record', self.id)
         proxy.set('document', self.document_id)
         proxy.set('index', self.index)
         proxy.set('bodyText', stringify(self.text))
         return proxy
     else:
         proxy = model.make_entity(self.SCHEMA_ROW)
         proxy.make_id('record', self.id)
         proxy.set('table', self.document_id)
         proxy.set('index', self.index)
         if self.data is not None:
             # sort values by columns
             values = [
                 self.data.get(k) for k in self.document.meta.get('columns')
             ]
             proxy.set('cells', registry.json.pack(values))
         return proxy
示例#32
0
def reconcile_op(query, collection=None):
    """Reconcile operation for a single query."""
    log.info("Reconcile: %r", query)
    args = {'limit': query.get('limit', '5')}
    if collection is not None:
        args['filter:collection_id'] = collection.get('id')
    parser = SearchQueryParser(args, request.authz)
    schema = query.get('type') or Entity.THING
    proxy = model.make_entity(schema)
    proxy.add('name', query.get('query'))
    for p in query.get('properties', []):
        proxy.add(p.get('pid'), p.get('v'), quiet=True)

    query = MatchQuery(parser, entity=proxy)
    matches = list(entity_matches(query.search()))
    return {
        'result': matches,
        'num': len(matches)
    }