def test_arxiv_to_article(): N = 100 min_hz = 20 # copy N values from arxiv myxiv = mongo_conn().myxiv records = [x for x in myxiv.arxiv.find().limit(N)] testdb = schema.connect("testdb", host="127.0.0.1", port=27017) testdb.drop_collection("arxiv") testdb.drop_collection("article") testdb.arxiv.insert(records) # try to import them all as articles t0 = time.time() failed = transforms.mongo_map(lambda x: transforms.arxiv_to_article(x, True), testdb.arxiv.find()) dt = time.time() - t0 assert N / dt > min_hz, (N / dt, " rec/sec too slow, min is ", min_hz) # Check they all made it assert failed == [], "No fails" assert testdb.article.count() == testdb.arxiv.count(), ( "arxiv count ", testdb.arxiv.count(), ", got article count ", testdb.article.count(), )
def write_mongo(db, tree): schema.connect(db) for r in arxiv.oai_records(tree): art = transforms.arxiv_to_article(r) try: art.validate() art.save() except Exception as e: print e