def test_load_dataset(self): """Test loading a dataset from file.""" store = FileSystemDatastore(STORE_DIR) ds = store.load_dataset(f_handle=FILE) dataset_dir = os.path.join(STORE_DIR, ds.identifier) self.assertTrue(os.path.isdir(dataset_dir)) self.assertTrue(os.path.isfile(os.path.join(dataset_dir, DATA_FILE))) self.assertTrue(os.path.isfile(os.path.join(dataset_dir, DESCRIPTOR_FILE))) self.assertFalse(os.path.isfile(os.path.join(dataset_dir, METADATA_FILE))) self.validate_class_size_dataset(ds) with self.assertRaises(ValueError): store.load_dataset(f_handle=None)
def test_delete_dataset(self): """Test deleting datasets.""" # None for non-existing dataset store = FileSystemDatastore(STORE_DIR) ds_id = store.load_dataset(f_handle=FILE).identifier ds_id_2 = store.load_dataset(f_handle=FILE).identifier self.assertIsNotNone(store.get_dataset(ds_id)) self.assertIsNotNone(store.get_dataset(ds_id_2)) store.delete_dataset(ds_id) self.assertIsNone(store.get_dataset(ds_id)) self.assertIsNotNone(store.get_dataset(ds_id_2)) # Reload store to ensure only one dataset still exists store = FileSystemDatastore(STORE_DIR) self.assertIsNone(store.get_dataset(ds_id)) self.assertIsNotNone(store.get_dataset(ds_id_2)) # Delete the second dataset store.delete_dataset(ds_id_2) store = FileSystemDatastore(STORE_DIR) self.assertIsNone(store.get_dataset(ds_id)) self.assertIsNone(store.get_dataset(ds_id_2))
def test_get_dataset(self): """Test accessing dataset handle and descriptor.""" # None for non-existing dataset store = FileSystemDatastore(STORE_DIR) self.assertIsNone(store.get_dataset('0000')) ds_id = store.load_dataset(f_handle=FILE).identifier self.assertIsNotNone(store.get_dataset(ds_id)) self.assertIsNone(store.get_dataset('0000')) # Reload store to ensure the dataset still exists store = FileSystemDatastore(STORE_DIR) self.assertIsNotNone(store.get_dataset(ds_id)) self.assertIsNone(store.get_dataset('0000')) self.validate_class_size_dataset(store.get_dataset(ds_id)) # Load a second dataset ds_id_2 = store.load_dataset(f_handle=FILE).identifier self.assertIsNotNone(store.get_dataset(ds_id)) self.assertIsNotNone(store.get_dataset(ds_id_2)) # Reload store to ensure the dataset still exists store = FileSystemDatastore(STORE_DIR) self.assertIsNotNone(store.get_dataset(ds_id)) self.assertIsNotNone(store.get_dataset(ds_id_2))
class TestDefaultPyCellProcessor(unittest.TestCase): def setUp(self): """Create instances of the default datastore and filestore.""" # Drop directory if it exists if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) os.makedirs(SERVER_DIR) self.datastore = FileSystemDatastore(DATASTORE_DIR) self.filestore = FileSystemFilestore(FILESTORE_DIR) def tearDown(self): """Clean-up by dropping the server directory. """ if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) def test_create_dataset_script(self): """Test running a script that creates a new datasets.""" cmd = python_cell(source=CREATE_DATASET_PY, validate=True) result = PyCellTaskProcessor().compute(command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext( datastore=self.datastore, filestore=self.filestore)) self.assertTrue(result.is_success) self.assertIsNotNone(result.provenance.read) self.assertIsNotNone(result.provenance.write) self.assertEqual(len(result.provenance.read), 0) self.assertEqual(len(result.provenance.write), 1) self.assertTrue('people' in result.provenance.write) self.assertIsNotNone(result.provenance.write['people']) self.assertEqual(len(result.outputs.stdout), 1) self.assertEqual(len(result.outputs.stderr), 0) self.assertEqual(result.outputs.stdout[0].value, 'Alice\nBob') def test_print_dataset_script(self): """Test running a script that prints rows in an existing datasets.""" fh = self.filestore.upload_file(CSV_FILE) ds = self.datastore.load_dataset(fh) cmd = python_cell(source=PRINT_DATASET_PY, validate=True) result = PyCellTaskProcessor().compute( command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext(datastore=self.datastore, filestore=self.filestore, datasets={'people': ds.identifier})) self.assertTrue(result.is_success) self.assertIsNotNone(result.provenance.read) self.assertIsNotNone(result.provenance.write) self.assertEqual(len(result.provenance.read), 1) self.assertEqual(len(result.provenance.write), 0) self.assertTrue('people' in result.provenance.read) self.assertIsNotNone(result.provenance.read['people']) self.assertEqual(len(result.outputs.stdout), 1) self.assertEqual(len(result.outputs.stderr), 0) self.assertEqual(result.outputs.stdout[0].value, 'Alice\nBob') def test_simple_script(self): """Test running the simple python script.""" cmd = python_cell(source='print 2+2', validate=True) result = PyCellTaskProcessor().compute(command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext( datastore=self.datastore, filestore=self.filestore, datasets=dict())) self.assertTrue(result.is_success) self.assertEqual(result.outputs.stdout[0].value, '4') def test_unknown_dataset_script(self): """Test running a script that accesses an unknown datasets.""" fh = self.filestore.upload_file(CSV_FILE) ds = self.datastore.load_dataset(fh) cmd = python_cell(source=PRINT_UNKNOWN_DATASET_PY, validate=True) result = PyCellTaskProcessor().compute( command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext(datastore=self.datastore, filestore=self.filestore, datasets={'people': ds.identifier})) self.assertFalse(result.is_success) self.assertIsNone(result.provenance.read) self.assertIsNone(result.provenance.write) self.assertEqual(len(result.outputs.stdout), 0) self.assertEqual(len(result.outputs.stderr), 1) # Running a similar script that catches the error schould be a success # and the access to the dataset should be recorded in the resulting # read provenance cmd = python_cell(source=PRINT_UNKNOWN_DATASET_PY_WITH_TRY_CATCH, validate=True) result = PyCellTaskProcessor().compute( command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext(datastore=self.datastore, filestore=self.filestore, datasets={'people': ds.identifier})) self.assertTrue(result.is_success) self.assertIsNotNone(result.provenance.read) self.assertIsNotNone(result.provenance.write) self.assertEqual(len(result.provenance.read), 1) self.assertEqual(len(result.provenance.write), 0) self.assertTrue('employees' in result.provenance.read) self.assertIsNone(result.provenance.read['employees']) self.assertEqual(len(result.outputs.stdout), 1) self.assertEqual(len(result.outputs.stderr), 0)
class TestDefaultPlotProcessor(unittest.TestCase): def setUp(self): """Create instances of the default datastore and filestore.""" # Drop directory if it exists if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) os.makedirs(SERVER_DIR) self.datastore=FileSystemDatastore(DATASTORE_DIR) self.filestore=FileSystemFilestore(FILESTORE_DIR) def tearDown(self): """Clean-up by dropping the server directory. """ if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) def test_advanced_plot(self): """Test running the simple plot command with a more advanced chart definition. """ fh = self.filestore.upload_file(TSV_FILE) ds = self.datastore.load_dataset(fh) cmd = create_plot( dataset_name=DATASET_NAME, chart_name='My Chart', series=[{'column': 1, 'range': '25:30', 'label': 'A'}, {'column': 0, 'range': '25:30'}], validate=True ) result = PlotProcessor().compute( command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext( project_id=0, datastore=self.datastore, filestore=self.filestore, artifacts={DATASET_NAME: ds} ) ) chart = result.outputs.stdout[0].value self.assertEqual(chart['data']['data'][0]['label'], 'A') self.assertEqual(chart['data']['data'][1]['label'], 'average_class_size') self.assertEqual(chart['result']['series'][0]['label'], 'A') self.assertEqual(chart['result']['series'][1]['label'], 'average_class_size') self.assertEqual(len(chart['result']['series'][0]['data']), 6) self.assertEqual(len(chart['result']['series'][1]['data']), 6) def test_simple_plot(self): """Test running the simple plot command.""" fh = self.filestore.upload_file(CSV_FILE) ds = self.datastore.load_dataset(fh) cmd = create_plot( dataset_name=DATASET_NAME, chart_name='My Chart', series=[{'column': 1}], validate=True ) result = PlotProcessor().compute( command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext( project_id=0, datastore=self.datastore, filestore=self.filestore, artifacts={DATASET_NAME: ds} ) )
class TestVizierClient(unittest.TestCase): def setUp(self): """Create instances of the default datastore and filestore.""" # Drop directory if it exists if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) os.makedirs(SERVER_DIR) self.datastore=FileSystemDatastore(DATASTORE_DIR) self.filestore=FileSystemFilestore(FILESTORE_DIR) def tearDown(self): """Clean-up by dropping the server directory. """ if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) def test_create_new_dataset(self): """Test creating and updating a new dataset via the client.""" client = VizierDBClient( datastore=self.datastore, datasets=dict(), dataobjects=dict(), source="", project_id=7 ) ds = DatasetClient() ds.insert_column('Name') ds.insert_column('Age') ds.insert_row(['Alice', '23']) ds.insert_row(['Bob', '25']) rows = ds.rows ds = client.create_dataset('MyDataset', ds) # Ensure the returned dataset contains the input data self.assertEqual([c.name for c in ds.columns], ['Name', 'Age']) self.assertEqual([str(v) for v in ds.rows[0].values], ['Alice', '23']) self.assertEqual([str(v) for v in ds.rows[1].values], ['Bob', '25']) # Modify the reference to the original rows to ensure that the rows in # the loaded datasets are not affected self.assertEqual([str(v) for v in rows[0].values], ['Alice', '23']) rows[0].set_value(0, 'Jane') self.assertEqual([str(v) for v in rows[0].values], ['Jane', '23']) self.assertEqual([str(v) for v in ds.rows[0].values], ['Alice', '23']) # Update dataset ds.rows[1].set_value('Age', '26') ds.save() ds = client.get_dataset('MyDataset') self.assertEqual([str(v) for v in ds.rows[1].values], ['Bob', '26']) # Value error when creating dataset with existing name with self.assertRaises(ValueError): client.create_dataset('MyDataset', ds) # Value error when retrieving unknown dataset with self.assertRaises(ValueError): client.get_dataset('SomeDataset') # Ensure the returned dataset contains the modified data client.rename_dataset('MyDataset', 'SomeDataset') ds = client.get_dataset('SomeDataset') # Ensure that access to unknown datasets is recorded with self.assertRaises(ValueError): client.get_dataset('ThisIsNotADataset') self.assertTrue('mydataset' in client.write) self.assertTrue('somedataset' in client.write) self.assertTrue('thisisnotadataset' in client.read) self.assertFalse('thisisnotadataset' in client.write) def test_update_existing_dataset(self): """Test creating and updating an existing dataset via the client.""" # Move columns around ds = self.datastore.load_dataset(self.filestore.upload_file(CSV_FILE)) client = VizierDBClient( datastore=self.datastore, datasets={DATASET_NAME:ds}, dataobjects=dict(), source="", project_id=7 ) ds = client.get_dataset(DATASET_NAME) col_1 = [row.get_value(1) for row in ds.rows] ds.insert_column('empty', 3) ds = client.update_dataset(DATASET_NAME, ds) col_2 = [row.get_value(2) for row in ds.rows] ds.move_column('empty', 1) ds = client.update_dataset(DATASET_NAME, ds) for i in range(len(ds.rows)): row = ds.rows[i] self.assertEqual(row.values[3], col_2[i]) self.assertEqual(row.values[2], col_1[i]) # Rename ds.columns[1].name = 'allnone' ds = client.update_dataset(DATASET_NAME, ds) for i in range(len(ds.rows)): row = ds.rows[i] self.assertEqual(row.get_value('allnone'), None) self.assertEqual(row.values[2], col_1[i]) # Insert row row = ds.insert_row() row.set_value('Name', 'Zoe') ds = client.create_dataset('upd', ds) self.assertEqual(len(ds.rows), 3) r2 = ds.rows[2] self.assertEqual(r2.values, ['Zoe', None, None, None]) # Delete column ds = client.get_dataset(DATASET_NAME) ds.delete_column('Age') client.update_dataset(DATASET_NAME, ds) ds = client.get_dataset(DATASET_NAME) names = [col.name.upper() for col in ds.columns] self.assertTrue('NAME' in names) self.assertFalse('AGE' in names) self.assertTrue(DATASET_NAME in client.read) self.assertTrue(DATASET_NAME in client.write) self.assertFalse('upd' in client.read) self.assertTrue('upd' in client.write)
class TestVizierClient(unittest.TestCase): def setUp(self): """Create instances of the default datastore and filestore.""" # Drop directory if it exists if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) os.makedirs(SERVER_DIR) self.datastore = FileSystemDatastore(DATASTORE_DIR) self.filestore = FileSystemFilestore(FILESTORE_DIR) def tearDown(self): """Clean-up by dropping the server directory. """ if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) def test_create_new_dataset(self): """Test creating and updating a new dataset via the client.""" client = VizierDBClient(datastore=self.datastore, datasets=dict()) ds = DatasetClient() ds.insert_column('Name') ds.insert_column('Age') ds.insert_row(['Alice', '23']) ds.insert_row(['Bob', '25']) rows = ds.rows ds = client.create_dataset('MyDataset', ds) # Ensure the returned dataset contains the input data self.assertEqual([c.name for c in ds.columns], ['Name', 'Age']) self.assertEqual([str(v) for v in ds.rows[0].values], ['Alice', '23']) self.assertEqual([str(v) for v in ds.rows[1].values], ['Bob', '25']) # Modify the reference to the original rows to ensure that the rows in # the loaded datasets are not affected self.assertEqual([str(v) for v in rows[0].values], ['Alice', '23']) rows[0].set_value(0, 'Jane') self.assertEqual([str(v) for v in rows[0].values], ['Jane', '23']) self.assertEqual([str(v) for v in ds.rows[0].values], ['Alice', '23']) # Update dataset ds.rows[1].set_value('Age', '26') client.update_dataset('MyDataset', ds) ds = client.get_dataset('MyDataset') self.assertEqual([str(v) for v in ds.rows[1].values], ['Bob', '26']) # Value error when creating dataset with existing name with self.assertRaises(ValueError): client.create_dataset('MyDataset', ds) # Value error when retrieving unknown dataset with self.assertRaises(ValueError): client.get_dataset('SomeDataset') # Ensure the returned dataset contains the modified data client.rename_dataset('MyDataset', 'SomeDataset') ds = client.get_dataset('SomeDataset') # Ensure that access to unknown datasets is recorded with self.assertRaises(ValueError): client.get_dataset('ThisIsNotADataset') for name in ['somedataset', 'mydataset']: self.assertTrue(name in client.read) self.assertTrue(name in client.write) self.assertTrue('thisisnotadataset' in client.read) self.assertFalse('thisisnotadataset' in client.write) def test_dataset_annotations(self): """Test creating and updating an existing dataset via the client.""" # Move columns around ds = self.datastore.load_dataset(self.filestore.upload_file(CSV_FILE)) client = VizierDBClient(datastore=self.datastore, datasets={DATASET_NAME: ds.identifier}) ds = client.get_dataset(DATASET_NAME) annotations = ds.annotations annotations.add(key='comment', value='Good', column_id=0, row_id=1) annotations.add(key='comment', value='Good', column_id=1, row_id=1) annotations.add(key='quality', value='Nice', column_id=0, row_id=1) ds = client.update_dataset(name=DATASET_NAME, dataset=ds) self.assertEqual(len(ds.annotations.cells), 3) ds = client.get_dataset(DATASET_NAME) self.assertEqual(len(ds.annotations.cells), 3) row = ds.rows[1] annotations = row.annotations(0) for key in ['comment', 'quality']: self.assertTrue(key in list(annotations.keys())) annotations = row.annotations(1) self.assertTrue('comment' in list(annotations.keys())) self.assertFalse('quality' in list(annotations.keys())) row.set_value(0, 'New Value', clear_annotations=True) self.assertEqual(len(ds.annotations.cells), 1) ds = client.update_dataset(name=DATASET_NAME, dataset=ds) self.assertEqual(len(ds.annotations.cells), 1) ds = client.get_dataset(DATASET_NAME) self.assertEqual(len(ds.annotations.cells), 1) def test_update_existing_dataset(self): """Test creating and updating an existing dataset via the client.""" # Move columns around ds = self.datastore.load_dataset(self.filestore.upload_file(CSV_FILE)) client = VizierDBClient(datastore=self.datastore, datasets={DATASET_NAME: ds.identifier}) ds = client.get_dataset(DATASET_NAME) col_1 = [row.get_value(1) for row in ds.rows] ds.insert_column('empty', 2) ds = client.update_dataset(DATASET_NAME, ds) col_2 = [row.get_value(2) for row in ds.rows] ds.move_column('empty', 1) ds = client.update_dataset(DATASET_NAME, ds) for i in range(len(ds.rows)): row = ds.rows[i] self.assertEqual(row.values[1], col_2[i]) self.assertEqual(row.values[2], col_1[i]) # Rename ds.columns[1].name = 'allnone' ds = client.update_dataset(DATASET_NAME, ds) for i in range(len(ds.rows)): row = ds.rows[i] self.assertEqual(row.get_value('allnone'), col_2[i]) self.assertEqual(row.values[2], col_1[i]) # Insert row row = ds.insert_row() row.set_value('Name', 'Zoe') ds = client.create_dataset('upd', ds) self.assertEqual(len(ds.rows), 3) r2 = ds.rows[2] self.assertEqual(r2.identifier, 2) self.assertEqual(r2.values, ['Zoe', None, None, None]) # Annotations ds = client.get_dataset(DATASET_NAME) col = ds.get_column('Age') row = ds.rows[0] ds.annotations.add(column_id=col.identifier, row_id=row.identifier, key='user:comment', value='My Comment') ds = client.update_dataset(DATASET_NAME, ds) annotations = ds.rows[0].annotations('Age').find_all('user:comment') self.assertEqual(len(annotations), 1) anno = annotations[0] self.assertEqual(anno.key, 'user:comment') self.assertEqual(anno.value, 'My Comment') ds.annotations.add(column_id=col.identifier, row_id=row.identifier, key='user:comment', value='Another Comment') ds = client.update_dataset(DATASET_NAME, ds) annotations = ds.rows[0].annotations('Age').find_all('user:comment') self.assertEqual(len(annotations), 2) self.assertEqual(list(ds.rows[0].annotations('Age').keys()), ['user:comment']) values = [a.value for a in annotations] for val in ['My Comment', 'Another Comment']: self.assertTrue(val in values) anno = ds.rows[0].annotations('Age').find_one('user:comment') anno.key = 'user:issue' anno.value = 'Some Issue' ds = client.update_dataset(DATASET_NAME, ds) annotations = ds.rows[0].annotations('Age').find_all('user:comment') self.assertEqual(len(annotations), 1) keys = list(ds.rows[0].annotations('Age').keys()) for key in ['user:comment', 'user:issue']: self.assertTrue(key in keys) values = [ a.value for a in ds.rows[0].annotations('Age').find_all('user:issue') ] for val in ['Some Issue']: self.assertTrue(val in values) ds.annotations.remove( column_id=col.identifier, row_id=row.identifier, key='user:issue', ) ds = client.update_dataset(DATASET_NAME, ds) annotations = ds.rows[0].annotations('Age').find_all('user:issue') self.assertEqual(len(annotations), 0) annotations = ds.rows[0].annotations('Age').find_all('user:comment') self.assertEqual(len(annotations), 1) # Delete column ds = client.get_dataset(DATASET_NAME) ds.delete_column('Age') client.update_dataset(DATASET_NAME, ds) ds = client.get_dataset(DATASET_NAME) names = [col.name.upper() for col in ds.columns] self.assertTrue('NAME' in names) self.assertFalse('AGE' in names) self.assertTrue(DATASET_NAME in client.read) self.assertTrue(DATASET_NAME in client.write) self.assertFalse('upd' in client.read) self.assertTrue('upd' in client.write)
class TestDefaultVizualProcessor(unittest.TestCase): def setUp(self): """Create an instance of the default vizier processor for an empty server directory. """ # Drop directory if it exists if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) os.makedirs(SERVER_DIR) self.datastore = FileSystemDatastore(DATASTORE_DIR) self.filestore = FileSystemFilestore(FILESTORE_DIR) def tearDown(self): """Clean-up by dropping the server directory. """ if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) def count_non_null_values(self, data, column_index): """Return the number of values in a column that are not None.""" count = 0 for row in data: if not row[column_index] is None: count += 1 return count def test_query(self): """Test running a query for simple chart plots.""" ds = self.datastore.load_dataset(self.filestore.upload_file(LOAD_FILE)) view = ChartViewHandle(dataset_name='ABC', x_axis=2) view.add_series(1, range_start=25, range_end=30) view.add_series(0, range_start=25, range_end=30) view.add_series(3, range_start=25, range_end=30) data = ChartQuery().exec_query(dataset=ds, view=view)[0] self.assertEqual(len(data), 6) for row in data: self.assertEqual(len(row), 3) self.assertTrue(isinstance(data[0][0], int)) self.assertTrue(isinstance(data[0][1], float)) # Remove interval end for one series. This should return all rows # starting from index 25 view = ChartViewHandle(dataset_name='ABC', x_axis=2) view.add_series(1, range_start=25, range_end=30) view.add_series(0, range_start=25) view.add_series(3, range_start=25, range_end=30) data = ChartQuery().exec_query(dataset=ds, view=view)[0] self.assertEqual(len(data), 29) self.assertIsNone(data[28][0]) self.assertIsNotNone(data[28][1]) self.assertIsNone(data[28][2]) for row in data: self.assertEqual(len(row), 3) # Remove interval start for another series. The first series will # contain 31 values, the second 29, and the third 6 view = ChartViewHandle(dataset_name='ABC', x_axis=2) view.add_series(1, range_end=30) view.add_series(0, range_start=25) view.add_series(3, range_start=25, range_end=30) data = ChartQuery().exec_query(dataset=ds, view=view)[0] self.assertEqual(len(data), 31) self.assertEqual(self.count_non_null_values(data, 0), 31) self.assertEqual(self.count_non_null_values(data, 1), 29) self.assertEqual(self.count_non_null_values(data, 2), 6) for row in data: self.assertEqual(len(row), 3) # Without any range constraints the result should contain all 54 rows view = ChartViewHandle(dataset_name='ABC', x_axis=2) view.add_series(1, label='A') view.add_series(0, label='B') view.add_series(3) data = ChartQuery().exec_query(dataset=ds, view=view)[0] self.assertEqual(len(data), 54)