def load_entity(tx, entity): log.info("Load node [%s]: %s", entity.id, entity.name) fp = fingerprint(entity.name) node = EntityNode.get_cache(tx, fp) if node is not None: return node country_code = entity.jurisdiction_code if country_code is not None: country_code = country_code.upper() node = EntityNode.merge(tx, name=entity.name, fingerprint=fp, alephState=entity.state, alephEntity=entity.id) add_to_collections(tx, node, entity.collections) seen = set([fp]) for other_name in entity.other_names: fp = fingerprint(other_name.display_name) if fp in seen or fingerprint is None: continue seen.add(fp) alias = EntityNode.merge(tx, name=other_name.display_name, fingerprint=fp, alephEntity=entity.id, isAlias=True) AKA.merge(tx, node, alias) add_to_collections(tx, node, entity.collections) # TODO contact details, addresses return node
def update(self, tx, row): """Prepare and load a node.""" props = self.bind_properties(row) fp = props.get(self.type.fingerprint) node = self.type.get_cache(tx, fp) if node is not None: return node node = self.type.merge(tx, **props) if node is not None: from aleph.graph.collections import add_to_collections add_to_collections(tx, node, [self.mapping.collection]) return node
def load_entity(tx, entity): if tx is None: return if entity.state != Entity.STATE_ACTIVE: return remove_entity(tx, entity.id) log.info("Graph node [%s]: %s", entity.id, entity.name) fp = entity.fingerprint node = EntityNode.get_cache(tx, fp) if node is not None: return node country_code = entity.jurisdiction_code if country_code is not None: country_code = country_code.upper() node = EntityNode.merge(tx, name=entity.name, fingerprint=fp, alephSchema=entity.type, alephState=entity.state, alephEntity=entity.id) add_to_collections(tx, node, entity.collections, alephEntity=entity.id, alephCanonical=entity.id) seen = set([fp]) for other_name in entity.other_names: fp = other_name.fingerprint if fp in seen or fp is None: continue seen.add(fp) alias = EntityNode.merge(tx, name=other_name.display_name, fingerprint=fp, alephEntity=entity.id, alephSchema=entity.type) AKA.merge(tx, node, alias, alephEntity=entity.id) add_to_collections(tx, alias, entity.collections, alephEntity=entity.id) # TODO contact details, addresses return node
def load_document(tx, document): meta = document.meta data = { 'name': meta.title, 'docType': document.type, 'fileName': meta.file_name, 'fingerprint': document.content_hash, 'alephDocument': document.id } node = DocumentNode.merge(tx, **data) add_to_collections(tx, node, document.collections) for email in meta.emails: enode = EmailNode.merge(tx, name=email, fingerprint=email) MENTIONS.merge(tx, node, enode) add_to_collections(tx, enode, document.collections) for phone in meta.phone_numbers: pnode = PhoneNode.merge(tx, name=phone, fingerprint=phone) MENTIONS.merge(tx, node, pnode) add_to_collections(tx, pnode, document.collections) for reference in document.references: if reference.origin == 'polyglot': continue enode = load_entity(tx, reference.entity) MENTIONS.merge(tx, node, enode, weight=reference.weight) return node
def load_document(tx, document): if tx is None: return log.info("Graph load [%s]: %r", document.id, document.meta) meta = document.meta node = DocumentNode.merge(tx, name=meta.title, alephTitle=document.type, fileName=meta.file_name, fingerprint=document.id, alephDocument=document.id) add_to_collections(tx, node, document.collections, alephDocument=document.id) for email in meta.emails: enode = EmailNode.merge(tx, name=email, fingerprint=email) MENTIONS.merge(tx, node, enode, alephDocument=document.id) add_to_collections(tx, enode, document.collections, alephDocument=document.id) for phone in meta.phone_numbers: pnode = PhoneNode.merge(tx, name=phone, fingerprint=phone) MENTIONS.merge(tx, node, pnode, alephDocument=document.id) add_to_collections(tx, pnode, document.collections, alephDocument=document.id) for entity in Entity.all_by_document(document.id): enode = load_entity(tx, entity) MENTIONS.merge(tx, node, enode, alephDocument=document.id, alephEntity=entity.id) return node
def load_document(tx, document): if tx is None: return log.info("Graph load [%s]: %r", document.id, document.meta) meta = document.meta data = { 'name': meta.title, 'alephType': document.type, 'fileName': meta.file_name, 'fingerprint': document.content_hash, 'alephDocument': document.id } node = DocumentNode.merge(tx, **data) add_to_collections(tx, node, document.collections, alephDocument=document.id) for email in meta.emails: enode = EmailNode.merge(tx, name=email, fingerprint=email) MENTIONS.merge(tx, node, enode, alephDocument=document.id) add_to_collections(tx, enode, document.collections, alephDocument=document.id) for phone in meta.phone_numbers: pnode = PhoneNode.merge(tx, name=phone, fingerprint=phone) MENTIONS.merge(tx, node, pnode, alephDocument=document.id) add_to_collections(tx, pnode, document.collections, alephDocument=document.id) for reference in document.references: if reference.entity.state != Entity.STATE_ACTIVE: continue enode = load_entity(tx, reference.entity) MENTIONS.merge(tx, node, enode, weight=reference.weight, alephDocument=document.id, alephEntity=reference.entity_id) return node