Пример #1
0
def _check_preassembly_with_database(num_stmts, batch_size):
    db = get_pa_loaded_db(num_stmts)

    # Now test the set of preassembled (pa) statements from the database
    # against what we get from old-fashioned preassembly (opa).
    opa_inp_stmts = _get_opa_input_stmts(db)

    # Get the set of raw statements.
    raw_stmt_list = db.select_all(db.RawStatements)
    all_raw_ids = {raw_stmt.id for raw_stmt in raw_stmt_list}
    assert len(raw_stmt_list)

    # Run the preassembly initialization.
    start = datetime.now()
    pa_manager = pm.PreassemblyManager(batch_size=batch_size, print_logs=True)
    pa_manager.create_corpus(db)
    end = datetime.now()
    print("Duration:", end - start)

    # Make sure the number of pa statements is within reasonable bounds.
    pa_stmt_list = db.select_all(db.PAStatements)
    assert 0 < len(pa_stmt_list) < len(raw_stmt_list)

    # Check the evidence links.
    raw_unique_link_list = db.select_all(db.RawUniqueLinks)
    assert len(raw_unique_link_list)
    all_link_ids = {ru.raw_stmt_id for ru in raw_unique_link_list}
    all_link_mk_hashes = {ru.pa_stmt_mk_hash for ru in raw_unique_link_list}
    assert len(all_link_ids - all_raw_ids) is 0
    assert all(
        [pa_stmt.mk_hash in all_link_mk_hashes for pa_stmt in pa_stmt_list])

    # Check the support links.
    sup_links = db.select_all([
        db.PASupportLinks.supporting_mk_hash,
        db.PASupportLinks.supported_mk_hash
    ])
    assert sup_links
    assert not any([l[0] == l[1] for l in sup_links]),\
        "Found self-support in the database."

    # Try to get all the preassembled statements from the table.
    pa_stmts = db_client.get_statements([],
                                        preassembled=True,
                                        db=db,
                                        with_support=True)
    assert len(pa_stmts) == len(pa_stmt_list), (len(pa_stmts),
                                                len(pa_stmt_list))

    self_supports = {
        shash(s): shash(s)
        in {shash(s_)
            for s_ in s.supported_by + s.supports}
        for s in pa_stmts
    }
    if any(self_supports.values()):
        assert False, "Found self-support in constructed pa statement objects."

    _check_against_opa_stmts(db, opa_inp_stmts, pa_stmts)
    return
Пример #2
0
def _get_opa_input_stmts(db):
    stmt_nd = db_util.get_reading_stmt_dict(db, get_full_stmts=True)
    reading_stmts, _ =\
        db_util.get_filtered_rdg_stmts(stmt_nd, get_full_stmts=True)
    db_stmts = db_client.get_statements(
        [db.RawStatements.reading_id.is_(None)], preassembled=False, db=db)
    stmts = reading_stmts | set(db_stmts)
    print("Got %d statements for opa." % len(stmts))
    return stmts
Пример #3
0
def test_get_statements():
    num_stmts = 10000
    db, _ = _get_prepped_db(num_stmts)

    # Test getting all statements
    stmts = dbc.get_statements([], preassembled=False, db=db)
    assert len(stmts) == num_stmts, len(stmts)

    stmts = dbc.get_statements([db.RawStatements.reading_id.isnot(None)],
                               preassembled=False,
                               db=db)
    pmids = {s.evidence[0].pmid for s in random.sample(stmts, 100)}
    assert pmids
    assert pmids != {None}
    md_list = pubc.get_metadata_for_ids(
        [pmid for pmid in pmids if pmid is not None])
    assert len(md_list) == len(pmids - {None}),\
        (len(md_list), len(pmids - {None}))

    # Test getting some statements
    stmt_uuid = stmts[0].uuid
    stmts = dbc.get_statements([db.RawStatements.uuid != stmt_uuid],
                               preassembled=False,
                               db=db)
    assert len(stmts) == num_stmts - 1, len(stmts)

    # Test getting statements without fix refs.
    stmts = dbc.get_statements([
        db.RawStatements.reading_id.isnot(None), db.RawStatements.reading_id
        == db.Reading.id, db.Reading.reader == 'SPARSER'
    ],
                               preassembled=False,
                               fix_refs=False,
                               db=db)
    assert 0 < len(stmts) < num_stmts, len(stmts)
    pmids = {s.evidence[0].pmid for s in random.sample(stmts, 200)}
    assert None in pmids, pmids
Пример #4
0
def _check_db_pa_supplement(num_stmts, batch_size, split=0.8, n_proc=1):
    pa_manager = pm.PreassemblyManager(batch_size=batch_size, n_proc=n_proc,
                                       print_logs=True)
    db = _get_loaded_db(num_stmts, split=split, pam=pa_manager)
    opa_inp_stmts = _get_opa_input_stmts(db)
    start = datetime.now()
    print('sleeping...')
    sleep(5)
    print("Beginning supplement...")
    pa_manager.supplement_corpus(db)
    end = datetime.now()
    print("Duration of incremental update:", end-start)

    pa_stmts = db_client.get_statements([], preassembled=True, db=db,
                                        with_support=True)
    _check_against_opa_stmts(db, opa_inp_stmts, pa_stmts)
    return