示例#1
0
def test_arxiv_to_article():
    N = 100
    min_hz = 20

    # copy N values from arxiv
    myxiv = mongo_conn().myxiv
    records = [x for x in myxiv.arxiv.find().limit(N)]

    testdb = schema.connect("testdb", host="127.0.0.1", port=27017)
    testdb.drop_collection("arxiv")
    testdb.drop_collection("article")
    testdb.arxiv.insert(records)

    # try to import them all as articles
    t0 = time.time()
    failed = transforms.mongo_map(lambda x: transforms.arxiv_to_article(x, True), testdb.arxiv.find())
    dt = time.time() - t0
    assert N / dt > min_hz, (N / dt, " rec/sec too slow, min is ", min_hz)

    # Check they all made it
    assert failed == [], "No fails"
    assert testdb.article.count() == testdb.arxiv.count(), (
        "arxiv count ",
        testdb.arxiv.count(),
        ", got article count ",
        testdb.article.count(),
    )
示例#2
0
def write_mongo(db, tree):
    schema.connect(db)
    for r in arxiv.oai_records(tree):
        art = transforms.arxiv_to_article(r)
        try:
            art.validate()
            art.save()
        except Exception as e:
            print e