def test_downloadDataDictionary(): ctx = reload.context() reload.downloadDataDictionary(ctx) assert filecmp.cmp(ctx["dataDictionaryInputFilePath"], "redcap/metadata.json") os.remove(ctx["dataDictionaryInputFilePath"])
def test_back_up_data_dictionary(): ctx = reload.context() with copy_file("redcap/metadata.json", ctx["dataDictionaryInputFilePath"]): assert reload.backUpDataDictionary(ctx) directory = reload.dataDictionaryBackUpDirectory(ctx) shutil.rmtree(directory)
def test_back_up_data_dictionary_not_exists(): ctx = reload.context() assert reload.backUpDataDictionary(ctx) directory = reload.dataDictionaryBackUpDirectory(ctx) assert not os.path.exists(ctx["dataDictionaryInputFilePath"]) assert not os.path.exists(directory)
def test_update_table_column(): ctx = reload.context() fn = "/tmp/ssd1.csv" fn2 = "/tmp/ssd2.csv" csv1 = [[i, i] for i in range(10)] csv2 = [[i, i + 1] for i in range(1, 11)] n = len(csv1) n2 = len(csv2) write_csv(fn, ["ProposalID", "siteNumber"], csv1) write_csv(fn2, ["ProposalID", "siteNumber"], csv2) try: reload._updateDataIntoTableColumn(ctx, "SiteInformation", "ProposalID", fn, {}) rows = reload.readDataFromTable(ctx, "SiteInformation") assert (bag_contains(rows, [{ "siteNumber": str(row[1]), "ProposalID": str(row[0]) } for row in csv1])) reload._updateDataIntoTableColumn(ctx, "SiteInformation", "ProposalID", fn2, {}) rows = reload.readDataFromTable(ctx, "SiteInformation") assert (bag_contains(rows, [{ "siteNumber": str(row[1]), "ProposalID": str(row[0]) } for row in csv1 if row[0] not in list(map(lambda x: x[0], csv2))] + [{ "siteNumber": str(row[1]), "ProposalID": str(row[0]) } for row in csv2])) finally: reload.clearDatabase(ctx) reload.createTables(ctx) os.unlink(fn) os.unlink(fn2)
def do_test_post_table(verb1, verb2, src, cnttype, tablename, kvp1, kvp2, content1, content2, has_comments=False): print("cwd =", os.getcwd()) ctx = reload.context() pServer = Process(target=server.server, args=[ctx], kwargs={}) pServer.start() time.sleep(WAIT_PERIOD) pWorker = Process(target=reload.startWorker) pWorker.start() time.sleep(WAIT_PERIOD) try: print("get " + tablename) resp = requests.get("http://localhost:5000/table/" + tablename) assert (resp.json() == []) print("post " + tablename) resp = do_request_table(verb1, tablename, kvp1, src, cnttype, has_comments=has_comments) print(resp.text) assert resp.status_code == 200 taskid = resp.json() assert isinstance(taskid, str) wait_for_task_to_finish(taskid) print("get " + tablename) resp = requests.get("http://localhost:5000/table/" + tablename) respjson = resp.json() assert (bag_contains(respjson, content1)) print("post " + tablename) resp = do_request_table(verb2, tablename, kvp2, src, cnttype, has_comments=has_comments) assert resp.status_code == 200 taskid = resp.json() assert isinstance(taskid, str) wait_for_task_to_finish(taskid) print("get " + tablename) resp = requests.get("http://localhost:5000/table/" + tablename) respjson = resp.json() assert (bag_contains(respjson, content2)) finally: pWorker.terminate() pServer.terminate() reload.clearTasks() reload.clearDatabase(ctx) reload.createTables(ctx)
def test_get_column_data_type_twice(): ctx = reload.context() dt = reload.getColumnDataType(ctx, "SiteInformation", "ProposalID") assert dt == "bigint" dt = reload.getColumnDataType(ctx, "SiteInformation", "ProposalID") assert dt == "bigint"
def wait_for_task_to_start(taskid): ctx = reload.context() resp = requests.get("http://localhost:5000/task/" + taskid) print(resp.json()) while resp.json()["status"] in ["queued"]: time.sleep(1) resp = requests.get("http://localhost:5000/task/" + taskid) print(resp.json())
def test_restore_database_with_lock(): print("test_restore_database") ctx = reload.context() with database(ctx, cleanup=True): ts = test_back_up_database(False) with database(ctx, cleanup=True): assert reload.restoreDatabase(ctx, ts) os.remove(ctx["backupDir"] + "/" + ts)
def test_etl(): ctx = reload.context() with copy_file("redcap/record.json", ctx["dataInputFilePath"]): with copy_file("redcap/metadata.json", ctx["dataDictionaryInputFilePath"]): with datatables(lambda: reload.etl(ctx)) as ret: assert ret assert os.path.isfile("/data/tables/Proposal") with open("/data/tables/Proposal") as f: assert sum(1 for _ in f) == 2
def test_delete_back_up_database(): print("test_back_up_database") test_sync(False) ctx = reload.context() with database(ctx, cleanup=True): ts = str(datetime.datetime.now()) assert reload._backUpDatabase(ctx, ts) assert reload._deleteBackup(ctx, ts) assert ts not in os.listdir(ctx["backupDir"])
def test_clear_database(): ctx = reload.context() reload.clearDatabase(ctx) engine = create_engine("postgresql+psycopg2://" + ctx["dbuser"] + ":" + ctx["dbpass"] + "@" + ctx["dbhost"] + ":" + ctx["dbport"] + "/" + ctx["dbname"]) conn = engine.connect() rs = conn.execute("SELECT table_schema,table_name FROM information_schema.tables WHERE table_schema = 'public' ORDER BY table_schema,table_name").fetchall() assert len(rs) == 0 conn.close() reload.createTables(ctx)
def test_start_worker(): ctx = reload.context() p = Process(target = reload.startWorker) workers = Worker.all(connection=reload.redisQueue()) assert len(list(workers)) == 0 p.start() time.sleep(WAIT_PERIOD) workers = Worker.all(connection=reload.redisQueue()) assert len(list(workers)) == 1 p.terminate()
def test_get_all_tasks(): ctx = reload.context() pServer = Process(target = server.server, args=[ctx], kwargs={}) print("starting server ctx = " + str(ctx)) pServer.start() print("server started, waiting for " + str(WAIT_PERIOD)) time.sleep(WAIT_PERIOD) print("clearing tasks") reload.clearTasks() print("clearing database") reload.clearDatabase(ctx) print("creating tables") reload.createTables(ctx) print("starting worker") pWorker = Process(target = reload.startWorker) pWorker.start() print("worker started, waiting for " + str(WAIT_PERIOD)) time.sleep(WAIT_PERIOD) print("set up") try: resp0 = requests.get("http://localhost:5000/task") assert len(resp0.json()["queued"]) == 0 resp1 = requests.post("http://localhost:5000/sync") task_id = resp1.json() wait_for_task_to_start(task_id) resp2 = requests.get("http://localhost:5000/task") assert resp2.json() == { "queued": [], "started": { "job_ids": [task_id], "expired_job_ids": [] }, "finished": { "job_ids": [], "expired_job_ids": [] }, "failed": { "job_ids": [], "expired_job_ids": [] }, "deferred": { "job_ids": [], "expired_job_ids": [] } } finally: pWorker.terminate() pServer.terminate() reload.clearTasks() reload.clearDatabase(ctx) reload.createTables(ctx)
def test_back_up_database_with_lock(cleanup=True): print("test_back_up_database") test_sync(False) ctx = reload.context() with database(ctx, cleanup=cleanup): ts = str(datetime.datetime.now()) assert reload.backUpDatabase(ctx, ts) assert(ts in os.listdir(ctx["backupDir"])) if cleanup: os.remove(ctx["backupDir"] + "/" + ts) else: return ts
def test_back_up_endpoint(): ctx = reload.context() p = Process(target = server.server, args=[ctx], kwargs={}) p.start() time.sleep(WAIT_PERIOD) try: resp = requests.get("http://localhost:5000/backup") assert resp.status_code == 200 print(resp.json()) assert isinstance(resp.json(), list) finally: p.terminate() reload.clearTasks()
def do_test_table(table_name, columns): ctx = reload.context() conn = connect(user=ctx["dbuser"], password=ctx["dbpass"], host=ctx["dbhost"], port=ctx["dbport"], dbname=ctx["dbname"]) conn.autocommit = True cur = conn.cursor() cur.execute('''SELECT * FROM "{0}"'''.format(table_name)) rs = cur.fetchall() colnames = [desc[0] for desc in cur.description] for column in columns: assert column in colnames
def test_downloadData(): ctx = reload.context() reload.downloadData(ctx) try: with open(ctx["dataInputFilePath"]) as f: obj = json.load(f) with open("redcap/record.json") as f2: obj2 = json.load(f2) diff = DeepDiff(obj, obj2) assert len(diff) == 0 except: os.stderr.write(str(diff) + "\n") raise os.remove(ctx["dataInputFilePath"])
def do_test_auxiliary(aux1, exp): aux0 = os.environ.get("AUXILIARY_PATH") os.environ["AUXILIARY_PATH"] = aux1 ctx = reload.context() shutil.copy("redcap/record.json", ctx["dataInputFilePath"]) shutil.copy("redcap/metadata.json", ctx["dataDictionaryInputFilePath"]) assert reload.etl(ctx) with open("/data/tables/ProposalFunding") as f: i = f.readline().split(",").index("totalBudgetInt") assert f.readline().split(",")[i] == exp os.remove(ctx["dataInputFilePath"]) os.remove(ctx["dataDictionaryInputFilePath"]) shutil.rmtree("/data/tables") if aux0 is None: del os.environ["AUXILIARY_PATH"] else: os.environ["AUXILIARY_PATH"] = aux0
def test_get_column_data_type_twice2(): ctx = reload.context() fn = "/tmp/ssd1.csv" csv1 = [[i, i] for i in range(10)] n = len(csv1) write_csv(fn, ["ProposalID", "siteNumber"], csv1) try: dt = reload.getColumnDataType(ctx, "SiteInformation", "ProposalID") assert dt == "bigint" reload._updateDataIntoTable(ctx, "SiteInformation", fn, {}) dt = reload.getColumnDataType(ctx, "SiteInformation", "ProposalID") assert dt == "bigint" finally: reload.clearDatabase(ctx) reload.createTables(ctx) os.unlink(fn)
def do_test_blocklist2(blocklist1, exp): blocklist0 = os.environ.get("BLOCK_PATH") os.environ["BLOCK_PATH"] = blocklist1 ctx = reload.context() shutil.copy("redcap/record2.json", ctx["dataInputFilePath"]) shutil.copy("redcap/metadata.json", ctx["dataDictionaryInputFilePath"]) assert reload.etl(ctx) with open("/data/tables/Proposal", newline="") as f: reader = csv.reader(f) headers = next(reader) i = sum(1 for row in reader) assert i == exp os.remove(ctx["dataInputFilePath"]) os.remove(ctx["dataDictionaryInputFilePath"]) shutil.rmtree("/data/tables") if blocklist0 is None: del os.environ["BLOCK_PATH"] else: os.environ["BLOCK_PATH"] = blocklist0
def do_test_insert_table(src, kvp, has_comments=False): ctx = reload.context() n = countrows(src, "text/csv") - (1 if has_comments else 0) try: reload.insertDataIntoTable(ctx, "SiteInformation", src, kvp) rows = reload.readDataFromTable(ctx, "SiteInformation") assert (bag_contains(rows, [{ "siteNumber": str(i), **kvp } for i in range(1, n + 1)])) reload.insertDataIntoTable(ctx, "SiteInformation", src, kvp) rows = reload.readDataFromTable(ctx, "SiteInformation") assert (bag_contains(rows, [{ "siteNumber": str(i), **kvp } for i in range(1, n + 1)] * 2)) finally: reload.clearDatabase(ctx) reload.createTables(ctx)
def test_sync(cleanup = True): ctx = reload.context() with database(ctx, cleanup=cleanup): with connection(ctx, autocommit=True) as conn: cur = conn.cursor() cur.execute('''SELECT COUNT(*) FROM "Proposal"''') rs = cur.fetchall() assert len(rs) == 1 for row in rs: assert row[0] == 0 with copytree("/etlout", "/data/tables"): print("sync database") assert reload.syncDatabase(ctx) cur.execute('''SELECT COUNT(*) FROM "Proposal"''') rs = cur.fetchall() assert len(rs) == 1 for row in rs: assert row[0] == 1 print("database synced")
def test_entrypoint(): ctx = reload.context() with database(ctx): with connection(ctx, autocommit=True) as conn: cur = conn.cursor() cur.execute('''SELECT COUNT(*) FROM "Proposal"''') rs = cur.fetchall() assert len(rs) == 1 for row in rs: assert row[0] == 0 ctx["reloaddb"]=False with copy_file("redcap/record.json", ctx["dataInputFilePath"]): with copy_file("redcap/metadata.json", ctx["dataDictionaryInputFilePath"]): with datatables(lambda: reload.entrypoint(ctx, one_off=True)): cur.execute('''SELECT COUNT(*) FROM "Proposal"''') rs = cur.fetchall() assert len(rs) == 1 for row in rs: assert row[0] == 1
def do_test_post_error(verb1, src, cnttype, tablename, kvp1, status_code, resp_text): ctx = reload.context() pServer = Process(target=server.server, args=[ctx], kwargs={}) pServer.start() time.sleep(WAIT_PERIOD) pWorker = Process(target=reload.startWorker) pWorker.start() time.sleep(WAIT_PERIOD) try: resp = do_request_table(verb1, tablename, kvp1, src, cnttype) assert resp.status_code == status_code taskid = resp.text assert re.match(resp_text, taskid) finally: pWorker.terminate() pServer.terminate() reload.clearTasks() reload.clearDatabase(ctx) reload.createTables(ctx)
def test_task(): ctx = reload.context() p = Process(target = server.server, args=[ctx], kwargs={}) p.start() time.sleep(WAIT_PERIOD) try: resp0 = requests.get("http://localhost:5000/task") assert len(resp0.json()["queued"]) == 0 resp = requests.post("http://localhost:5000/backup") resp2 = requests.get("http://localhost:5000/task") assert "queued" in resp2.json() assert len(resp2.json()["queued"]) == 1 for status in ["started", "finished", "failed", "deferred"]: assert status in resp2.json() for category in ["job_ids", "expired_job_ids"]: assert category in resp2.json()[status] assert len(resp2.json()[status][category]) == 0 finally: p.terminate() reload.clearTasks()
def test_get_task(): ctx = reload.context() p = Process(target = server.server, args=[ctx], kwargs={}) p.start() time.sleep(WAIT_PERIOD) try: resp = requests.post("http://localhost:5000/backup") resp2 = requests.get("http://localhost:5000/task/" + resp.json()) assert "name" in resp2.json() assert "created_at" in resp2.json() assert "ended_at" in resp2.json() assert "started_at" in resp2.json() assert "enqueued_at" in resp2.json() assert "description" in resp2.json() assert "status" in resp2.json() assert "result" in resp2.json() finally: p.terminate() reload.clearTasks()
def test_delete_task(): ctx = reload.context() p = Process(target = server.server, args=[ctx], kwargs={}) p.start() time.sleep(WAIT_PERIOD) try: resp0 = requests.get("http://localhost:5000/task") assert len(resp0.json()["queued"]) == 0 resp = requests.post("http://localhost:5000/sync") resp1 = requests.post("http://localhost:5000/sync") resp2 = requests.get("http://localhost:5000/task") assert len(resp2.json()["queued"]) == 2 assert resp.json() in resp2.json()["queued"] assert resp1.json() in resp2.json()["queued"] requests.delete("http://localhost:5000/task/" + resp1.json()) resp3 = requests.get("http://localhost:5000/task") assert len(resp3.json()["queued"]) == 1 assert resp.json() in resp3.json()["queued"] assert resp1.json() not in resp3.json()["queued"] finally: p.terminate() reload.clearTasks()
def test_downloadData(): ctx = reload.context() reload.downloadData(ctx) assert filecmp.cmp(ctx["dataInputFilePath"], "redcap/record.json") os.remove(ctx["dataInputFilePath"])
import os from multiprocessing import Process import reload import server if __name__ == "__main__": ctx = reload.context() s = os.environ["RELOAD_SCHEDULE"] == "1" o = os.environ["RELOAD_ONE_OFF"] == "1" cdb = os.environ["CREATE_TABLES"] == "1" idb = os.environ["INSERT_DATA"] == "1" scheduleRunTime = os.environ["SCHEDULE_RUN_TIME"] runServer = os.environ["SERVER"] == "1" p2 = Process(target=reload.startWorker) p2.start() p = Process(target=reload.entrypoint, args=[ctx], kwargs={ "create_tables": cdb, "insert_data": idb, "reload": s, "one_off": o, "schedule_run_time": scheduleRunTime }) p.start() if runServer: server.server(ctx) p.join() p2.join()
def test_post_table_column(): ctx = reload.context() fn = "/tmp/ssd1.csv" fn2 = "/tmp/ssd2.csv" csv1 = [[i, i] for i in range(10)] csv2 = [[i, i + 1] for i in range(1, 11)] n = len(csv1) n2 = len(csv2) write_csv(fn, ["ProposalID", "siteNumber"], csv1) write_csv(fn2, ["ProposalID", "siteNumber"], csv2) tablename = "SiteInformation" column = "ProposalID" kvp1 = kvp2 = {} cnttype = "text/csv" verb1 = verb2 = requests.post content1 = [{ "siteNumber": str(row[1]), "ProposalID": str(row[0]) } for row in csv1] content2 = [{ "siteNumber": str(row[1]), "ProposalID": str(row[0]) } for row in csv1 if row[0] not in list(map(lambda x: x[0], csv2)) ] + [{ "siteNumber": str(row[1]), "ProposalID": str(row[0]) } for row in csv2] pServer = Process(target=server.server, args=[ctx], kwargs={}) pServer.start() time.sleep(WAIT_PERIOD) pWorker = Process(target=reload.startWorker) pWorker.start() time.sleep(WAIT_PERIOD) try: resp = do_request_table_column(verb1, tablename, column, kvp1, fn, cnttype) assert resp.status_code == 200 taskid = resp.json() assert isinstance(taskid, str) wait_for_task_to_finish(taskid) print("get " + tablename) resp = requests.get("http://localhost:5000/table/" + tablename) respjson = resp.json() assert (bag_contains(respjson, content1)) print("post " + tablename) resp = do_request_table_column(verb2, tablename, column, kvp2, fn2, cnttype) assert resp.status_code == 200 taskid = resp.json() assert isinstance(taskid, str) wait_for_task_to_finish(taskid) print("get " + tablename) resp = requests.get("http://localhost:5000/table/" + tablename) respjson = resp.json() assert (bag_contains(respjson, content2)) finally: pWorker.terminate() pServer.terminate() reload.clearTasks() reload.clearDatabase(ctx) reload.createTables(ctx)