def test_select_and_where_video_in_table(self): select_query = "SELECT id,data FROM MyVideo WHERE id = 5;" actual_batch = execute_query_fetch_all(select_query) expected_batch = list(create_dummy_batches(filters=[5]))[0] self.assertEqual(actual_batch, expected_batch) select_query = "SELECT data FROM MyVideo WHERE id = 5;" actual_batch = execute_query_fetch_all(select_query) expected_rows = [{ "data": np.array(np.ones((2, 2, 3)) * float(5 + 1) * 25, dtype=np.uint8) }] expected_batch = Batch(frames=pd.DataFrame(expected_rows)) self.assertEqual(actual_batch, expected_batch) select_query = "SELECT id, data FROM MyVideo WHERE id >= 2;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list( create_dummy_batches(filters=range(2, NUM_FRAMES)))[0] self.assertEqual(actual_batch, expected_batch) select_query = "SELECT id, data FROM MyVideo WHERE id >= 2 AND id < 5;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches(filters=range(2, 5)))[0] self.assertEqual(actual_batch, expected_batch)
def test_should_select_star_in_table(self): select_query = "SELECT * FROM MyVideo;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches())[0] self.assertEqual(actual_batch, expected_batch) select_query = "SELECT * FROM MyVideo WHERE id = 5;" actual_batch = execute_query_fetch_all(select_query) expected_batch = list(create_dummy_batches(filters=[5]))[0] self.assertEqual(actual_batch, expected_batch)
def test_should_return_even_frames(self): dummy_batches = list(create_dummy_batches()) petastorm = PetastormStorageEngine() petastorm.create(self.table) for batch in dummy_batches: petastorm.write(self.table, batch) read_batch = list( petastorm.read(self.table, ["id"], lambda id: id % 2 == 0)) expected_batch = list( create_dummy_batches( filters=[i for i in range(NUM_FRAMES) if i % 2 == 0])) self.assertTrue(read_batch, expected_batch)
def test_should_load_and_select_in_table(self): query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;""" perform_query(query) select_query = "SELECT id FROM MyVideo;" actual_batch = perform_query(select_query) expected_rows = [{"id": i} for i in range(NUM_FRAMES)] expected_batch = Batch(frames=pd.DataFrame(expected_rows)) self.assertEqual(actual_batch, expected_batch) select_query = "SELECT data FROM MyVideo;" actual_batch = perform_query(select_query) expected_rows = [{ "data": np.array(np.ones((2, 2, 3)) * 0.1 * float(i + 1) * 255, dtype=np.uint8) } for i in range(NUM_FRAMES)] expected_batch = Batch(frames=pd.DataFrame(expected_rows)) self.assertEqual(actual_batch, expected_batch) # select * is not supported select_query = "SELECT id,data FROM MyVideo;" actual_batch = [perform_query(select_query)] expected_batch = list(create_dummy_batches()) self.assertEqual(actual_batch, expected_batch)
def test_should_work_if_batch_size_not_in_config(self, get_val_mock): video_loader = OpenCVReader('dummy.avi') get_val_mock.return_value = None batches = list(video_loader.read()) expected = list(create_dummy_batches()) self.assertTrue(batches, expected) get_val_mock.assert_called_once_with("executor", "batch_size")
def test_should_skip_first_two_frames_with_offset_two(self): video_loader = OpenCVReader(file_url='dummy.avi', offset=2) batches = list(video_loader.read()) expected = list( create_dummy_batches(filters=[i for i in range(2, NUM_FRAMES)])) self.assertTrue(batches, expected)
def test_should_return_one_batch(self): video_loader = OpenCVReader(file_url=os.path.join( PATH_PREFIX, 'dummy.avi'), batch_mem_size=NUM_FRAMES * FRAME_SIZE) batches = list(video_loader.read()) expected = list(create_dummy_batches()) self.assertTrue(batches, expected)
def test_should_return_batches_equivalent_to_number_of_frames(self): video_loader = OpenCVReader(file_url=os.path.join( PATH_PREFIX, 'dummy.avi'), batch_mem_size=FRAME_SIZE) batches = list(video_loader.read()) expected = list(create_dummy_batches(batch_size=1)) self.assertTrue(batches, expected)
def test_nested_select_video_in_table(self): nested_select_query = """SELECT id, data FROM (SELECT id, data FROM MyVideo WHERE id >= 2 AND id < 5) WHERE id >= 3;""" actual_batch = execute_query_fetch_all(nested_select_query) actual_batch.sort() expected_batch = list(create_dummy_batches(filters=range(3, 5)))[0] self.assertEqual(actual_batch, expected_batch)
def test_select_and_sample(self): select_query = "SELECT id,data FROM MyVideo SAMPLE 7 ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list( create_dummy_batches(filters=range(0, NUM_FRAMES, 7))) self.assertEqual(actual_batch.batch_size, expected_batch[0].batch_size)
def test_should_load_video_in_table(self): query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;""" perform_query(query) metadata = CatalogManager().get_dataset_metadata("", "MyVideo") actual_batch = list(StorageEngine.read(metadata)) expected_batch = list(create_dummy_batches()) self.assertEqual(actual_batch, expected_batch)
def test_should_start_frame_number_from_two(self): video_loader = OpenCVReader(file_url='dummy.avi', batch_size=NUM_FRAMES, start_frame_id=2) batches = list(video_loader.read()) expected = list( create_dummy_batches(filters=[i for i in range(0, NUM_FRAMES)], start_id=2)) self.assertTrue(batches, expected)
def test_select_and_limit(self): select_query = "SELECT id,data FROM MyVideo LIMIT 5;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches(num_frames=10, batch_size=5)) self.assertEqual(actual_batch.batch_size, expected_batch[0].batch_size) self.assertEqual(actual_batch, expected_batch[0])
def test_should_load_video_in_table(self): query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;""" execute_query_fetch_all(query) metadata = CatalogManager().get_dataset_metadata("", "MyVideo") actual_batch = Batch(pd.DataFrame()) actual_batch = Batch.concat(StorageEngine.read(metadata), copy=False) actual_batch.sort() expected_batch = list(create_dummy_batches()) self.assertEqual([actual_batch], expected_batch)
def test_should_start_frame_number_from_two(self): video_loader = OpenCVReader(file_url=os.path.join( PATH_PREFIX, 'dummy.avi'), batch_mem_size=FRAME_SIZE * NUM_FRAMES, start_frame_id=2) batches = list(video_loader.read()) expected = list( create_dummy_batches(filters=[i for i in range(0, NUM_FRAMES)], start_id=2)) self.assertTrue(batches, expected)
def test_should_write_rows_to_table(self): dummy_batches = list(create_dummy_batches()) petastorm = PetastormStorageEngine() petastorm.create(self.table) for batch in dummy_batches: petastorm.write(self.table, batch) read_batch = list(petastorm.read(self.table)) self.assertTrue(read_batch, dummy_batches)
def test_should_skip_first_two_frames_and_batch_size_equal_to_no_of_frames( self): video_loader = OpenCVReader(file_url=os.path.join( PATH_PREFIX, 'dummy.avi'), batch_mem_size=FRAME_SIZE * NUM_FRAMES, offset=2) batches = list(video_loader.read()) expected = list( create_dummy_batches(filters=[i for i in range(2, NUM_FRAMES)])) self.assertTrue(batches, expected)
def test_should_write_rows_to_table(self): dummy_batches = list(create_dummy_batches()) petastorm = PetastormStorageEngine() petastorm.create(self.table) for batch in dummy_batches: batch.drop_column_alias() petastorm.write(self.table, batch) read_batch = list(petastorm.read(self.table, batch_mem_size=3000)) self.assertTrue(read_batch, dummy_batches)
def test_select_and_union_video_in_table(self): select_query = """SELECT id, data FROM MyVideo WHERE id < 3 UNION ALL SELECT id, data FROM MyVideo WHERE id > 7;""" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list( create_dummy_batches( filters=[i for i in range(NUM_FRAMES) if i < 3 or i > 7]))[0] self.assertEqual(actual_batch, expected_batch) select_query = """SELECT id, data FROM MyVideo WHERE id < 2 UNION ALL SELECT id, data FROM MyVideo WHERE id > 4 AND id < 6 UNION ALL SELECT id, data FROM MyVideo WHERE id > 7;""" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list( create_dummy_batches(filters=[ i for i in range(NUM_FRAMES) if i < 2 or i == 5 or i > 7 ]))[0] self.assertEqual(actual_batch, expected_batch)
def test_should_load_video_in_table(self): query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo WITH FORMAT VIDEO;""" execute_query_fetch_all(query) select_query = """SELECT id, data FROM MyVideo;""" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches())[0] expected_batch.modify_column_alias('myvideo') self.assertEqual(actual_batch, expected_batch)
def test_should_return_even_frames(self): dummy_batches = list(create_dummy_batches()) petastorm = PetastormStorageEngine() petastorm.create(self.table) for batch in dummy_batches: batch.drop_column_alias() petastorm.write(self.table, batch) read_batch = list( petastorm.read( self.table, batch_mem_size=3000, columns=["id"], predicate_func=lambda id: id % 2 == 0)) expected_batch = list(create_dummy_batches( filters=[ i for i in range(NUM_FRAMES) if i % 2 == 0])) self.assertTrue(read_batch, expected_batch)
def test_should_load_video_in_table(self): query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;""" perform_query(query) metadata = CatalogManager().get_dataset_metadata("", "MyVideo") actual_batch = Batch(pd.DataFrame()) for batch in StorageEngine.read(metadata): actual_batch += batch actual_batch.sort() expected_batch = list(create_dummy_batches()) self.assertEqual([actual_batch], expected_batch)
def test_should_load_and_select_using_udf_video(self): # Equality test select_query = "SELECT id,DummyObjectDetector(data) FROM MyVideo \ WHERE DummyObjectDetector(data).label = ['person'] ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) expected = [{ 'myvideo.id': i * 2, 'dummyobjectdetector.label': np.array(['person']) } for i in range(NUM_FRAMES // 2)] expected_batch = Batch(frames=pd.DataFrame(expected)) self.assertEqual(actual_batch, expected_batch) # Contain test select_query = "SELECT id,DummyObjectDetector(data) FROM MyVideo \ WHERE DummyObjectDetector(data).label @> ['person'] ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) self.assertEqual(actual_batch, expected_batch) # Multi element contain test select_query = "SELECT id,DummyObjectDetector(data) FROM MyVideo \ WHERE DummyObjectDetector(data).label <@ ['person', 'bicycle'] \ ORDER BY id;" actual_batch = execute_query_fetch_all(select_query) expected = [{ 'myvideo.id': i * 2, 'dummyobjectdetector.label': np.array(['person']) } for i in range(NUM_FRAMES // 2)] expected += [{ 'myvideo.id': i, 'dummyobjectdetector.label': np.array(['bicycle']) } for i in range(NUM_FRAMES) if i % 2 + 1 == 2] expected_batch = Batch(frames=pd.DataFrame(expected)) expected_batch.sort() self.assertEqual(actual_batch, expected_batch) nested_select_query = """SELECT id, data FROM (SELECT id, data, DummyObjectDetector(data) FROM MyVideo WHERE id >= 2 ) AS T WHERE ['person'] <@ label; """ actual_batch = execute_query_fetch_all(nested_select_query) actual_batch.sort() expected_batch = list( create_dummy_batches( filters=[i for i in range(2, NUM_FRAMES) if i % 2 == 0]))[0] expected_batch.modify_column_alias('T') self.assertEqual(actual_batch, expected_batch)
def test_should_load_and_select_in_table(self): select_query = "SELECT id FROM MyVideo;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_rows = [{"id": i} for i in range(NUM_FRAMES)] expected_batch = Batch(frames=pd.DataFrame(expected_rows)) self.assertEqual(actual_batch, expected_batch) select_query = "SELECT id,data FROM MyVideo;" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches()) self.assertEqual([actual_batch], expected_batch)
def test_should_select_star_in_nested_query(self): select_query = """SELECT * FROM (SELECT * FROM MyVideo) AS T;""" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches())[0] expected_batch.modify_column_alias("T") self.assertEqual(actual_batch, expected_batch) select_query = """SELECT * FROM (SELECT id FROM MyVideo) AS T;""" actual_batch = execute_query_fetch_all(select_query) actual_batch.sort() expected_rows = [{"T.id": i} for i in range(NUM_FRAMES)] expected_batch = Batch(frames=pd.DataFrame(expected_rows)) self.assertEqual(actual_batch, expected_batch)
def test_select_and_where_video_in_table(self): load_query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;""" perform_query(load_query) select_query = "SELECT id,data FROM MyVideo WHERE id = 5;" actual_batch = perform_query(select_query) expected_batch = list(create_dummy_batches(filters=[5]))[0] self.assertEqual(actual_batch, expected_batch) select_query = "SELECT data FROM MyVideo WHERE id = 5;" actual_batch = perform_query(select_query) expected_rows = [{ "data": np.array(np.ones((2, 2, 3)) * 0.1 * float(5 + 1) * 255, dtype=np.uint8) }] expected_batch = Batch(frames=pd.DataFrame(expected_rows)) self.assertEqual(actual_batch, expected_batch) select_query = "SELECT id, data FROM MyVideo WHERE id >= 2;" actual_batch = perform_query(select_query) expected_batch = list( create_dummy_batches(filters=range(2, NUM_FRAMES)))[0] self.assertEqual(actual_batch, expected_batch) select_query = "SELECT id, data FROM MyVideo WHERE id >= 2 AND id < 5;" actual_batch = perform_query(select_query) expected_batch = list(create_dummy_batches(filters=range(2, 5)))[0] self.assertEqual(actual_batch, expected_batch) nested_select_query = """SELECT id, data FROM \ (SELECT id, data FROM MyVideo WHERE id >= 2 AND id < 5) WHERE id >= 3;""" actual_batch = perform_query(nested_select_query) expected_batch = list(create_dummy_batches(filters=range(3, 5)))[0] self.assertEqual(actual_batch, expected_batch)
def test_should_load_and_select_in_table(self): select_query = "SELECT id FROM MyVideo;" actual_batch = perform_query(select_query) actual_batch.sort() expected_rows = [{"id": i} for i in range(NUM_FRAMES)] expected_batch = Batch(frames=pd.DataFrame(expected_rows)) self.assertEqual(actual_batch, expected_batch) # Need Order by # select_query = "SELECT data FROM MyVideo;" # actual_batch = perform_query(select_query) # expected_rows = [{"data": np.array(np.ones((2, 2, 3)) # * 0.1 * float(i + 1) * 255, # dtype=np.uint8)} # for i in range(NUM_FRAMES)] # expected_batch = Batch(frames=pd.DataFrame(expected_rows)) # self.assertEqual(actual_batch, expected_batch) select_query = "SELECT id,data FROM MyVideo;" actual_batch = perform_query(select_query) actual_batch.sort() expected_batch = list(create_dummy_batches()) self.assertEqual([actual_batch], expected_batch)
def test_should_load_and_select_using_udf_video(self): load_query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;""" perform_query(load_query) create_udf_query = """CREATE UDF DummyObjectDetector INPUT (Frame_Array NDARRAY (3, 256, 256)) OUTPUT (label TEXT(10)) TYPE Classification IMPL 'test/util.py'; """ perform_query(create_udf_query) select_query = "SELECT id,DummyObjectDetector(data) FROM MyVideo \ WHERE DummyObjectDetector(data).label = 'person';" actual_batch = perform_query(select_query) actual_batch.sort() expected = [{ 'id': i * 2, 'label': 'person' } for i in range(NUM_FRAMES // 2)] expected_batch = Batch(frames=pd.DataFrame(expected)) self.assertEqual(actual_batch, expected_batch) nested_select_query = """SELECT id, data FROM (SELECT id, data, DummyObjectDetector(data) FROM MyVideo WHERE id >= 2 ) WHERE label = 'person'; """ actual_batch = perform_query(nested_select_query) actual_batch.sort() expected_batch = list( create_dummy_batches( filters=[i for i in range(2, NUM_FRAMES) if i % 2 == 0]))[0] self.assertEqual(actual_batch, expected_batch)
def test_should_return_batches_equivalent_to_number_of_frames(self): video_loader = OpenCVReader(file_url='dummy.avi', batch_size=1) batches = list(video_loader.read()) expected = list(create_dummy_batches(batch_size=1)) self.assertTrue(batches, expected)
def test_should_return_one_batches_for_negative_size(self): video_loader = OpenCVReader(file_url='dummy.avi', batch_size=-1) batches = list(video_loader.read()) expected = list(create_dummy_batches()) self.assertTrue(batches, expected)