Пример #1
0
def many_server(request, tempdir):
    sock = do_zeo_server(request, tempdir, name="many_server", fsname='many.fs')
    db = zerodb.testing.db(None, sock)
    with transaction.manager:
        for i in range(2000):
            db.add(Page(title="hello %s" % i, text="lorem ipsum dolor sit amet" * 2))
        for i in range(1000):
            # Variable length while keeping number of terms the same
            # will cause variable scores
            db.add(Page(title="hello %s" % i, text="this is something we're looking for" * int(i ** 0.5)))
        db.add(Page(title="extra page", text="something else is here"))
    db.disconnect()
    return sock
Пример #2
0
def extract_from_db_to_file_system(min_page_len=500):
    """
    :param min_page_len:
    :return:
    """
    folder = "%s/temp" % config.data_base_path
    if not os.path.exists(folder) or not os.path.isdir(folder):
        os.makedirs(folder)
    p = Page()
    counter = 1
    batch_size = 1000
    offset = 0
    limit = batch_size
    batch_result = p.get_pages(offset, limit, min_page_len)
    while len(batch_result) > 0:
        for r in batch_result:
            file_path = "%s/%s.md" % (folder, counter)
            f = open(file_path, "w")
            f.write(r.page_title.encode("utf8"))
            f.write("\n")
            f.write(r.page_content)
            f.close()
            if counter % 100 == 0:
                print("extract %d th page now" % counter)
            counter += 1
        offset += batch_size
        batch_result = p.get_pages(offset, limit, min_page_len)
Пример #3
0
def new_book_page(bid, p_name, p_content):
    nbpage = Page(page_name=p_name,
                  page_content=p_content,
                  created=datetime.now(),
                  updated=datetime.now(),
                  book_id=bid)
    db_session.add(nbpage)
    db_session.commit()
Пример #4
0
def test_reindex(db):
    with transaction.manager:
        page = Page(
            title="hello",
            text="Quick0 brown lazy fox jumps over lorem  ipsum dolor sit amet"
        )
        docid = db.add(page)
    assert len(db[Page].query(Contains("text", "quick0"))) == 1

    # DbModel, by ID
    with transaction.manager:
        page.text = "Quick1 brown lazy fox jumps over well, you know"
        db[Page].reindex(docid)
    assert len(db[Page].query(Contains("text", "quick0"))) == 0
    assert len(db[Page].query(Contains("text", "quick1"))) == 1

    # DbModel, by obj
    with transaction.manager:
        page.text = "quick2 brown lazy fox jumps over well, you know"
        db[Page].reindex(page)
    assert len(db[Page].query(Contains("text", "quick1"))) == 0
    assert len(db[Page].query(Contains("text", "quick2"))) == 1

    # DB, by obj
    with transaction.manager:
        page.text = "quick3 brown lazy fox jumps over well, you know"
        db[Page].reindex(page)
    assert len(db[Page].query(Contains("text", "quick2"))) == 0
    assert len(db[Page].query(Contains("text", "quick3"))) == 1

    # DB, multiple objects
    with transaction.manager:
        page2 = Page(
            title="hello",
            text="Quick4 brown lazy fox jumps over lorem  ipsum dolor sit amet"
        )
        db.add(page2)

    with transaction.manager:
        page.text = "quick5 brown lazy fox jumps over well, you know"
        page2.text = "quick5 brown lazy fox jumps over well, you know"
        db.reindex([page, page2])
    assert len(db[Page].query(
        Contains("text", "quick3") | Contains("text", "quick4"))) == 0
    assert len(db[Page].query(Contains("text", "quick5"))) == 2
Пример #5
0
def test_add(db):
    with transaction.manager:
        pre_commit_count = db._storage._debug_download_count
        page = Page(title="hello", text="Quick brown lazy fox jumps over lorem  ipsum dolor sit amet")
        db.add(page)
        post_commit_count = db._storage._debug_download_count
    print("Number of requests:", post_commit_count - pre_commit_count)
    assert post_commit_count - pre_commit_count < 22

    with transaction.manager:
        db.remove(page)
Пример #6
0
def test_all_uid(db):
    # Test for https://gist.github.com/micxjo/a097698b33fc4669b0b4
    page = Page(title="Test page", text="Hello world")
    with transaction.manager:
        db.add(page)

    del page
    # Clear in-memory and on-disk caches
    db._storage._cache.clear()
    db._connection._cache.full_sweep()

    for item in db[Page].all():
        assert hasattr(item, "_p_uid")
        del item

    db._storage._cache.clear()
    db._connection._cache.full_sweep()

    for uid in db[Page].all_uids():
        obj = db[Page][uid]
        assert hasattr(obj, "_p_uid")
        del obj

    db._storage._cache.clear()
    db._connection._cache.full_sweep()

    uids = list(islice(db[Page].all_uids(), 10))
    objs = db[Page][uids]
    for obj in objs:
        assert hasattr(obj, "_p_uid")

    objs = list(db[Page].all())
    obj = objs[0]
    obj._p_activate()
    assert hasattr(obj, "_p_uid")

    objs[1].text += " xxx"
    transaction.commit()
    for obj in objs:
        assert hasattr(obj, "_p_uid")
Пример #7
0
        # page info
        page_number = 1
        for page_link in driver.find_elements_by_css_selector(
            "div#content div.prevws a"
        ):
            page_url = page_link.get_attribute("href")
            page = session.query(Page).filter(Page.url == page_url).first()
            page_text = get_page_text(
                journal_title, issue_date, issue_text, page_number
            )
            try:
                page_link.find_element_by_class_name("treffer")
                hit = True
            except NoSuchElementException:
                hit = False
            if page:
                if args.update:
                    page.text = page_text
            else:
                page = Page(issue.issue_id, page_number, page_text, hit, page_url)
            session.add(page)
            logger.debug(
                f"Page info extracted. Number: {page_number}, page url: {page_url} and page text: {page_text[:10] if page_text else None}"
            )
            page_number += 1
        session.commit()

    session.close()
    driver.quit()
    logger.info(f"Completed. Processing took {(datetime.now() - t1).seconds}s.")
Пример #8
0
def test_auto_reindex(db):
    with transaction.manager:
        page = Page(title="hello", text="autoreindex0, test whether to work")
        db.add(page)
    assert len(db[Page].query(Contains("text", "autoreindex0"))) == 1

    with transaction.manager:
        page.text = "autoreindex1, test whether to work"
    assert len(db[Page].query(Contains("text", "autoreindex0"))) == 0
    assert len(db[Page].query(Contains("text", "autoreindex1"))) == 1

    with transaction.manager:
        page2 = Page(title="hello", text="autoreindex2, test whether to work")
        db.add(page2)

    with transaction.manager:
        page.text = "autoreindex3, test whether to work"
        page2.text = "autoreindex3, test whether to work"
    assert len(db[Page].query(Contains("text", "autoreindex1") | Contains("text", "autoreindex2"))) == 0
    assert len(db[Page].query(Contains("text", "autoreindex3"))) == 2

    with mock.patch("zerodb.db.DbModel.reindex_one") as reindex_mock:
        with transaction.manager:
            page.text = "autoreindex3, test whether to work1"
            page.text = "autoreindex3, test whether to work2"
            page.text = "autoreindex3, test whether to work3"
        assert reindex_mock.call_count == 1

    db.enableAutoReindex(False)
    with transaction.manager:
        page.text = "autoreindex4, test whether to work"
    assert len(db[Page].query(Contains("text", "autoreindex3"))) == 2
    assert len(db[Page].query(Contains("text", "autoreindex4"))) == 0

    db.enableAutoReindex(True)
    with transaction.manager:    # should not throw ModleException
        page3 = Page(title="helloworld", text="autoreindex5, test whether to work")
        page3.title = "helloworld1"
    assert len(db[Page].query(Eq("title", "helloworld"))) == 0
    assert len(db[Page].query(Eq("title", "helloworld1"))) == 0

    with mock.patch("zerodb.db.DbModel.reindex_one") as reindex_mock:
        with transaction.manager:  # should not reindex
            page3 = Page(title="helloworld", text="autoreindex5, test whether to work")
            page3.title = "helloworld1"
            db.add(page3)
        assert reindex_mock.call_count == 0

    with transaction.manager:  # should  reindex
        page3 = Page(title="helloworld", text="autoreindex6, test whether to work")
        db.add(page3)
        page3.title = "helloworld1"
        page3.text = "autoreindex7, test whether to work"
    assert len(db[Page].query(Eq("title", "helloworld"))) == 0
    assert len(db[Page].query(Eq("title", "helloworld1"))) == 2
    assert len(db[Page].query(Contains("text", "autoreindex6"))) == 0
    assert len(db[Page].query(Contains("text", "autoreindex7"))) == 1

    with mock.patch("zerodb.db.DbModel.reindex_one") as reindex_mock:
        with transaction.manager:  # should  reindex
            page3 = Page(title="helloworld", text="autoreindex6, test whether to work")
            db.add(page3)
            page3.title = "helloworld1"
            page3.text = "autoreindex7, test whether to work"
        assert reindex_mock.call_count == 1