def test_should_load_and_select_using_udf_video(self): # Equality test select_query = "SELECT id,DummyObjectDetector(data) FROM MyVideo \ WHERE DummyObjectDetector(data).label = ['person'] ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) expected = [{ 'myvideo.id': i * 2, 'dummyobjectdetector.label': ['person'] } for i in range(NUM_FRAMES // 2)] expected_batch = Batch(frames=pd.DataFrame(expected)) self.assertEqual(actual_batch, expected_batch) # Contain test select_query = "SELECT id, DummyObjectDetector(data) FROM MyVideo \ WHERE DummyObjectDetector(data).label <@ ['person'] ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) self.assertEqual(actual_batch, expected_batch) select_query = "SELECT id FROM MyVideo WHERE \ DummyMultiObjectDetector(data).labels @> ['person'] ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) expected = [{'myvideo.id': i} for i in range(0, NUM_FRAMES, 3)] expected_batch = Batch(frames=pd.DataFrame(expected)) self.assertEqual(actual_batch, expected_batch)
def test_select_and_where_video_in_table(self): select_query = "SELECT id,data FROM MyVideo WHERE id = 5;" actual_batch = execute_query_fetch_all(select_query) expected_batch = list(create_dummy_batches(filters=[5]))[0] self.assertEqual(actual_batch, expected_batch) select_query = "SELECT data FROM MyVideo WHERE id = 5;" actual_batch = execute_query_fetch_all(select_query) expected_rows = [{ "myvideo.data": np.array(np.ones((2, 2, 3)) * float(5 + 1) * 25, dtype=np.uint8) }] expected_batch = Batch(frames=pd.DataFrame(expected_rows)) self.assertEqual(actual_batch, expected_batch) select_query = "SELECT id, data FROM MyVideo WHERE id >= 2;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list( create_dummy_batches(filters=range(2, NUM_FRAMES)))[0] self.assertEqual(actual_batch, expected_batch) select_query = "SELECT id, data FROM MyVideo WHERE id >= 2 AND id < 5;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches(filters=range(2, 5)))[0] self.assertEqual(actual_batch, expected_batch)
def test_should_mat_view_to_the_same_table(self): materialized_query = """CREATE MATERIALIZED VIEW IF NOT EXISTS dummy_view2 (id, label) AS SELECT id, DummyObjectDetector(data).label FROM MyVideo WHERE id < 5; """ execute_query_fetch_all(materialized_query) materialized_query = """CREATE MATERIALIZED VIEW IF NOT EXISTS dummy_view2 (id, label) AS SELECT id, DummyObjectDetector(data).label FROM MyVideo WHERE id >= 5; """ execute_query_fetch_all(materialized_query) select_query = 'SELECT id, label FROM dummy_view2;' actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() labels = DummyObjectDetector().labels expected = [{ 'dummy_view2.id': i, 'dummy_view2.label': labels[1 + i % 2] } for i in range(5)] expected_batch = Batch(frames=pd.DataFrame(expected)) self.assertEqual(actual_batch, expected_batch)
def setUpClass(cls): CatalogManager().reset() copy_sample_video_to_prefix() query = """LOAD DATA INFILE 'ua_detrac.mp4' INTO MyVideo;""" execute_query_fetch_all(query) load_inbuilt_udfs()
def test_should_upload_video_to_location(self): query = """UPLOAD PATH 'dummy.avi' BLOB "b'AAAA'";""" execute_query_fetch_all(query) expected_blob = "b'AAAA'" with open(os.path.join(PATH_PREFIX, 'dummy.avi'), 'rb') as f: bytes_read = f.read() actual_blob = str(base64.b64encode(bytes_read)) self.assertEqual(actual_blob, expected_blob)
def setUpClass(cls): CatalogManager().reset() create_sample_video(NUM_FRAMES) load_query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;""" execute_query_fetch_all(load_query) load_inbuilt_udfs() cls.table1 = create_table("table1", 100, 3) cls.table2 = create_table("table2", 500, 3) cls.table3 = create_table("table3", 1000, 3)
def test_should_select_star_in_table(self): select_query = "SELECT * FROM MyVideo;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches())[0] self.assertEqual(actual_batch, expected_batch) select_query = "SELECT * FROM MyVideo WHERE id = 5;" actual_batch = execute_query_fetch_all(select_query) expected_batch = list(create_dummy_batches(filters=[5]))[0] self.assertEqual(actual_batch, expected_batch)
def setUpClass(cls): # reset the catalog manager before running each test CatalogManager().reset() create_sample_video() copy_sample_video_to_prefix() load_query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;""" execute_query_fetch_all(load_query) query = """LOAD DATA INFILE 'ua_detrac.mp4' INTO UATRAC;""" execute_query_fetch_all(query) load_inbuilt_udfs()
def test_should_load_video_in_table(self): query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo WITH FORMAT VIDEO;""" execute_query_fetch_all(query) select_query = """SELECT id, data FROM MyVideo;""" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches())[0] expected_batch.modify_column_alias('myvideo') self.assertEqual(actual_batch, expected_batch)
def test_should_load_and_select_using_udf_video(self): # Equality test select_query = "SELECT id,DummyObjectDetector(data) FROM MyVideo \ WHERE DummyObjectDetector(data).label = ['person'] ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) expected = [{ 'myvideo.id': i * 2, 'dummyobjectdetector.label': np.array(['person']) } for i in range(NUM_FRAMES // 2)] expected_batch = Batch(frames=pd.DataFrame(expected)) self.assertEqual(actual_batch, expected_batch) # Contain test select_query = "SELECT id,DummyObjectDetector(data) FROM MyVideo \ WHERE DummyObjectDetector(data).label @> ['person'] ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) self.assertEqual(actual_batch, expected_batch) # Multi element contain test select_query = "SELECT id,DummyObjectDetector(data) FROM MyVideo \ WHERE DummyObjectDetector(data).label <@ ['person', 'bicycle'] \ ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) expected = [{ 'myvideo.id': i * 2, 'dummyobjectdetector.label': np.array(['person']) } for i in range(NUM_FRAMES // 2)] expected += [{ 'myvideo.id': i, 'dummyobjectdetector.label': np.array(['bicycle']) } for i in range(NUM_FRAMES) if i % 2 + 1 == 2] expected_batch = Batch(frames=pd.DataFrame(expected)) expected_batch.sort() self.assertEqual(actual_batch, expected_batch) nested_select_query = """SELECT id, data FROM (SELECT id, data, DummyObjectDetector(data) FROM MyVideo WHERE id >= 2 ) AS T WHERE ['person'] <@ label; """ actual_batch = execute_query_fetch_all(nested_select_query) actual_batch.sort() expected_batch = list( create_dummy_batches( filters=[i for i in range(2, NUM_FRAMES) if i % 2 == 0]))[0] expected_batch.modify_column_alias('T') self.assertEqual(actual_batch, expected_batch)
def test_sort_on_nonprojected_column(self): """This tests doing an order by on a column that is not projected. The orderby_executor currently catches the KeyError, passes, and returns the untouched data """ select_query = "SELECT data FROM MyVideo ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) select_query = "SELECT data FROM MyVideo" expected_batch = execute_query_fetch_all(select_query) self.assertEqual(actual_batch.batch_size, expected_batch.batch_size)
def setUp(self): CatalogManager().reset() create_sample_video(NUM_FRAMES) load_query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;""" execute_query_fetch_all(load_query) create_udf_query = """CREATE UDF DummyObjectDetector INPUT (Frame_Array NDARRAY UINT8(3, 256, 256)) OUTPUT (label NDARRAY STR(10)) TYPE Classification IMPL 'test/util.py'; """ execute_query_fetch_all(create_udf_query)
def test_should_load_and_select_in_table(self): select_query = "SELECT id FROM MyVideo;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_rows = [{"myvideo.id": i} for i in range(NUM_FRAMES)] expected_batch = Batch(frames=pd.DataFrame(expected_rows)) self.assertEqual(actual_batch, expected_batch) select_query = "SELECT id,data FROM MyVideo;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches()) self.assertEqual([actual_batch], expected_batch)
def test_should_select_star_in_nested_query(self): select_query = """SELECT * FROM (SELECT * FROM MyVideo) AS T;""" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches())[0] expected_batch.modify_column_alias("T") self.assertEqual(actual_batch, expected_batch) select_query = """SELECT * FROM (SELECT id FROM MyVideo) AS T;""" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_rows = [{"T.id": i} for i in range(NUM_FRAMES)] expected_batch = Batch(frames=pd.DataFrame(expected_rows)) self.assertEqual(actual_batch, expected_batch)
def test_should_load_video_in_table(self): query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;""" execute_query_fetch_all(query) insert_query = """ INSERT INTO MyVideo (id, data) VALUES (40, [[[40, 40, 40] , [40, 40, 40]], [[40, 40, 40], [40, 40, 40]]]);""" execute_query_fetch_all(insert_query) insert_query_2 = """ INSERT INTO MyVideo (id, data) VALUES (41, [[[41, 41, 41] , [41, 41, 41]], [[41, 41, 41], [41, 41, 41]]]);""" execute_query_fetch_all(insert_query_2) query = 'SELECT id, data FROM MyVideo WHERE id = 40' batch = execute_query_fetch_all(query) self.assertIsNone(np.testing.assert_array_equal( batch.frames['data'][0], np.array([[[40, 40, 40], [40, 40, 40]], [[40, 40, 40], [40, 40, 40]]]))) query = 'SELECT id, data FROM MyVideo WHERE id = 41;' batch = execute_query_fetch_all(query) self.assertIsNone(np.testing.assert_array_equal( batch.frames['data'][0], np.array([[[41, 41, 41], [41, 41, 41]], [[41, 41, 41], [41, 41, 41]]])))
def test_should_load_and_select_real_video_in_table(self): query = """LOAD DATA INFILE 'data/ua_detrac/ua_detrac.mp4' INTO UADETRAC;""" execute_query_fetch_all(query) select_query = "SELECT * FROM UADETRAC;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() video_reader = OpenCVReader("data/ua_detrac/ua_detrac.mp4", batch_mem_size=30000000) expected_batch = Batch(frames=pd.DataFrame()) for batch in video_reader.read(): expected_batch += batch expected_batch.modify_column_alias("uadetrac") self.assertEqual(actual_batch, expected_batch)
def test_should_load_and_sort_in_table(self): select_query = "SELECT data, id FROM MyVideo ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) expected_rows = [{ "myvideo.id": i, "myvideo.data": np.array(np.ones((2, 2, 3)) * float(i + 1) * 25, dtype=np.uint8), } for i in range(NUM_FRAMES)] expected_batch = Batch(frames=pd.DataFrame(expected_rows)) self.assertEqual(actual_batch, expected_batch) select_query = "SELECT data, id FROM MyVideo ORDER BY id DESC;" actual_batch = execute_query_fetch_all(select_query) expected_batch.reverse() self.assertEqual(actual_batch, expected_batch)
def test_hash_join_with_multiple_tables(self): select_query = """SELECT * FROM table1 JOIN table2 ON table1.a0 = table2.a0 JOIN table3 ON table3.a1 = table1.a1 WHERE table1.a2 > 50;""" actual_batch = execute_query_fetch_all(select_query) tmp = pd.merge( self.table1, self.table2, left_on=["table1.a0"], right_on=["table2.a0"], how="inner", ) expected = pd.merge( tmp, self.table3, left_on=["table1.a1"], right_on=["table3.a1"], how="inner", ) expected = expected.where(expected["table1.a2"] > 50) if len(expected): expected_batch = Batch(expected) self.assertEqual( expected_batch.sort_orderby(["table1.a0"]), actual_batch.sort_orderby(["table1.a0"]), )
def test_array_count(self): select_query = """SELECT id FROM MyVideo WHERE Array_Count(DummyMultiObjectDetector(data).labels, 'person') = 2 ORDER BY id;""" actual_batch = execute_query_fetch_all(select_query) expected = [{'myvideo.id': i} for i in range(0, NUM_FRAMES, 3)] expected_batch = Batch(frames=pd.DataFrame(expected)) self.assertEqual(actual_batch, expected_batch) select_query = """SELECT id FROM MyVideo WHERE Array_Count(DummyObjectDetector(data).label, 'bicycle') = 1 ORDER BY id;""" actual_batch = execute_query_fetch_all(select_query) expected = [{'myvideo.id': i} for i in range(1, NUM_FRAMES, 2)] expected_batch = Batch(frames=pd.DataFrame(expected)) self.assertEqual(actual_batch, expected_batch)
def test_show_udfs(self): result = execute_query_fetch_all("SHOW UDFS;") self.assertEqual(len(result.frames.columns), 5) expected = { "name": ["FastRCNNObjectDetector", "Array_Count"], "inputs": [ ["Frame_Array NDARRAY UINT8 [3, None, None]"], ["Input NDARRAY ANYTYPE []", "Key ANY"], ], "outputs": [ [ "labels NDARRAY STR [None]", "bboxes NDARRAY FLOAT32 [None, 4]", "scores NDARRAY FLOAT32 [None]", ], ["count INTEGER"], ], "type": ["Classification", "Ndarray"], } expected_df = pd.DataFrame(expected) self.assertTrue(all(expected_df.inputs == result.frames.inputs)) self.assertTrue(all(expected_df.outputs == result.frames.outputs)) self.assertTrue(all(expected_df.name == result.frames.name)) self.assertTrue(all(expected_df.type == result.frames.type))
def init_builtin_udfs(mode='debug'): """ Loads the builtin udfs into the system. This should be called when the system bootstraps. In debug mode, it also loads udfs used in the test suite. Arguments: mode (str): 'debug' or 'release' """ queries = [Fastrcnn_udf_query, Unnest_udf_query, ArrayCount_udf_query] if mode == 'debug': queries.extend([ DummyObjectDetector_udf_query, DummyMultiObjectDetector_udf_query ]) for query in queries: execute_query_fetch_all(query)
def create_table(table_name, num_rows, num_columns): # creates a table with num_rows tuples and columns = [a1, a2, a3, ...] columns = ''.join('a{} INTEGER, '.format(i) for i in range(num_columns - 1)) columns += 'a{} INTEGER'.format(num_columns - 1) create_table_query = 'CREATE TABLE IF NOT EXISTS {} ( {} );'.format( table_name, columns) execute_query_fetch_all(create_table_query) columns = ['a{}'.format(i) for i in range(num_columns)] df = create_csv(num_rows, columns) # load the CSV load_query = """LOAD DATA INFILE 'dummy.csv' INTO {} WITH FORMAT CSV;""".format(table_name) execute_query_fetch_all(load_query) df.columns = [f'{table_name}.{col}' for col in df.columns] return df
def test_should_mat_view_with_fastrcnn(self): select_query = """SELECT id, FastRCNNObjectDetector(data).labels FROM UATRAC WHERE id < 5;""" query = '''CREATE MATERIALIZED VIEW IF NOT EXISTS uadtrac_fastRCNN (id, labels) \ AS {}'''.format(select_query) execute_query_fetch_all(query) select_view_query = 'SELECT id, labels FROM uadtrac_fastRCNN' actual_batch = execute_query_fetch_all(select_view_query) actual_batch.sort() self.assertEqual(actual_batch.batch_size, 5) # non-trivial test case res = actual_batch.frames for idx in res.index: self.assertTrue('car' in res['uadtrac_fastrcnn.labels'][idx])
def test_should_mat_view_with_dummy(self): materialized_query = """CREATE MATERIALIZED VIEW dummy_view (id, label) AS SELECT id, DummyObjectDetector(data).label FROM MyVideo; """ execute_query_fetch_all(materialized_query) select_query = 'SELECT id, label FROM dummy_view;' actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() labels = DummyObjectDetector().labels expected = [{ 'dummy_view.id': i, 'dummy_view.label': labels[1 + i % 2] } for i in range(NUM_FRAMES)] expected_batch = Batch(frames=pd.DataFrame(expected)) self.assertEqual(actual_batch, expected_batch)
def test_lateral_join_with_multiple_projects(self): select_query = """SELECT id, labels FROM MyVideo JOIN LATERAL FastRCNNObjectDetector(data) WHERE id < 5;""" actual_batch = execute_query_fetch_all(select_query) self.assertTrue( all(actual_batch.frames.columns == ["myvideo.id", "fastrcnnobjectdetector.labels"])) self.assertEqual(actual_batch.batch_size, 5)
def test_select_and_sample(self): select_query = "SELECT id,data FROM MyVideo SAMPLE 7 ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list( create_dummy_batches(filters=range(0, NUM_FRAMES, 7))) self.assertEqual(actual_batch.batch_size, expected_batch[0].batch_size)
def test_should_drop_table(self): catalog_manager = CatalogManager() query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;""" execute_query_fetch_all(query) metadata_obj = catalog_manager.get_dataset_metadata(None, "MyVideo") video_dir = metadata_obj.file_url self.assertFalse(metadata_obj is None) column_objects = catalog_manager.get_all_column_objects(metadata_obj) self.assertEqual(len(column_objects), 2) self.assertTrue(Path(video_dir).exists()) drop_query = """DROP TABLE MyVideo;""" execute_query_fetch_all(drop_query) self.assertTrue( catalog_manager.get_dataset_metadata(None, "MyVideo") is None) column_objects = catalog_manager.get_all_column_objects(metadata_obj) self.assertEqual(len(column_objects), 0) self.assertFalse(Path(video_dir).exists())
def test_select_and_limit(self): select_query = "SELECT id,data FROM MyVideo ORDER BY id LIMIT 5;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches(num_frames=10, batch_size=5)) self.assertEqual(actual_batch.batch_size, expected_batch[0].batch_size) self.assertEqual(actual_batch, expected_batch[0])
def test_should_run_pytorch_and_ssd(self): create_udf_query = """CREATE UDF SSDObjectDetector INPUT (Frame_Array NDARRAY UINT8(3, 256, 256)) OUTPUT (label NDARRAY STR(10)) TYPE Classification IMPL 'eva/udfs/ssd_object_detector.py'; """ execute_query_fetch_all(create_udf_query) select_query = """SELECT SSDObjectDetector(data) FROM MyVideo WHERE id < 5;""" actual_batch = execute_query_fetch_all(select_query) self.assertEqual(actual_batch.batch_size, 5) # non-trivial test case res = actual_batch.frames for idx in res.index: self.assertTrue('car' in res['ssdobjectdetector.label'][idx])
def test_nested_select_video_in_table(self): nested_select_query = """SELECT id, data FROM (SELECT id, data FROM MyVideo WHERE id >= 2 AND id < 5) AS T WHERE id >= 3;""" actual_batch = execute_query_fetch_all(nested_select_query) actual_batch.sort() expected_batch = list(create_dummy_batches(filters=range(3, 5)))[0] expected_batch.modify_column_alias("T") self.assertEqual(actual_batch, expected_batch) nested_select_query = """SELECT T.id, T.data FROM (SELECT id, data FROM MyVideo WHERE id >= 2 AND id < 5) AS T WHERE id >= 3;""" actual_batch = execute_query_fetch_all(nested_select_query) actual_batch.sort() expected_batch = list(create_dummy_batches(filters=range(3, 5)))[0] expected_batch.modify_column_alias("T") self.assertEqual(actual_batch, expected_batch)