示例#1
0
文件: documents.py 项目: tomjie/aleph
def load_document(tx, document):
    meta = document.meta
    data = {
        'name': meta.title,
        'docType': document.type,
        'fileName': meta.file_name,
        'fingerprint': document.content_hash,
        'alephDocument': document.id
    }
    node = DocumentNode.merge(tx, **data)
    add_to_collections(tx, node, document.collections)

    for email in meta.emails:
        enode = EmailNode.merge(tx, name=email, fingerprint=email)
        MENTIONS.merge(tx, node, enode)
        add_to_collections(tx, enode, document.collections)

    for phone in meta.phone_numbers:
        pnode = PhoneNode.merge(tx, name=phone, fingerprint=phone)
        MENTIONS.merge(tx, node, pnode)
        add_to_collections(tx, pnode, document.collections)

    for reference in document.references:
        if reference.origin == 'polyglot':
            continue
        enode = load_entity(tx, reference.entity)
        MENTIONS.merge(tx, node, enode, weight=reference.weight)
    return node
示例#2
0
def load_document(tx, document):
    if tx is None:
        return
    log.info("Graph load [%s]: %r", document.id, document.meta)
    meta = document.meta
    node = DocumentNode.merge(tx, name=meta.title, alephTitle=document.type,
                              fileName=meta.file_name, fingerprint=document.id,
                              alephDocument=document.id)
    add_to_collections(tx, node, document.collections,
                       alephDocument=document.id)

    for email in meta.emails:
        enode = EmailNode.merge(tx, name=email, fingerprint=email)
        MENTIONS.merge(tx, node, enode, alephDocument=document.id)
        add_to_collections(tx, enode, document.collections,
                           alephDocument=document.id)

    for phone in meta.phone_numbers:
        pnode = PhoneNode.merge(tx, name=phone, fingerprint=phone)
        MENTIONS.merge(tx, node, pnode, alephDocument=document.id)
        add_to_collections(tx, pnode, document.collections,
                           alephDocument=document.id)

    for entity in Entity.all_by_document(document.id):
        enode = load_entity(tx, entity)
        MENTIONS.merge(tx, node, enode,
                       alephDocument=document.id,
                       alephEntity=entity.id)
    return node
示例#3
0
def load_document(tx, document):
    if tx is None:
        return
    log.info("Graph load [%s]: %r", document.id, document.meta)
    meta = document.meta
    node = DocumentNode.merge(tx,
                              name=meta.title,
                              alephTitle=document.type,
                              fileName=meta.file_name,
                              fingerprint=document.id,
                              alephDocument=document.id)
    add_to_collections(tx,
                       node,
                       document.collections,
                       alephDocument=document.id)

    for email in meta.emails:
        enode = EmailNode.merge(tx, name=email, fingerprint=email)
        MENTIONS.merge(tx, node, enode, alephDocument=document.id)
        add_to_collections(tx,
                           enode,
                           document.collections,
                           alephDocument=document.id)

    for phone in meta.phone_numbers:
        pnode = PhoneNode.merge(tx, name=phone, fingerprint=phone)
        MENTIONS.merge(tx, node, pnode, alephDocument=document.id)
        add_to_collections(tx,
                           pnode,
                           document.collections,
                           alephDocument=document.id)

    for entity in Entity.all_by_document(document.id):
        enode = load_entity(tx, entity)
        MENTIONS.merge(tx,
                       node,
                       enode,
                       alephDocument=document.id,
                       alephEntity=entity.id)
    return node
示例#4
0
def load_document(tx, document):
    if tx is None:
        return
    log.info("Graph load [%s]: %r", document.id, document.meta)
    meta = document.meta
    data = {
        'name': meta.title,
        'alephType': document.type,
        'fileName': meta.file_name,
        'fingerprint': document.content_hash,
        'alephDocument': document.id
    }
    node = DocumentNode.merge(tx, **data)
    add_to_collections(tx, node, document.collections,
                       alephDocument=document.id)

    for email in meta.emails:
        enode = EmailNode.merge(tx, name=email, fingerprint=email)
        MENTIONS.merge(tx, node, enode, alephDocument=document.id)
        add_to_collections(tx, enode, document.collections,
                           alephDocument=document.id)

    for phone in meta.phone_numbers:
        pnode = PhoneNode.merge(tx, name=phone, fingerprint=phone)
        MENTIONS.merge(tx, node, pnode, alephDocument=document.id)
        add_to_collections(tx, pnode, document.collections,
                           alephDocument=document.id)

    for reference in document.references:
        if reference.entity.state != Entity.STATE_ACTIVE:
            continue
        enode = load_entity(tx, reference.entity)
        MENTIONS.merge(tx, node, enode, weight=reference.weight,
                       alephDocument=document.id,
                       alephEntity=reference.entity_id)
    return node