def setUp(self): """Create an empty work trails repository.""" # Cleanup first self.cleanUp() self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.vizual = MimirVizualEngine(self.datastore, self.fileserver)
def test_mimir_client(self): """Run tests for default engine and Mimir data store.""" mimir.initialize() self.fs = DefaultFileServer(SERVER_DIR) self.ds = MimirDataStore(DATASTORE_DIR) self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs))) mimir.finalize()
def datastore_init(self, store_type): """Test initalizing a datastore with existing datasets.""" self.setup_fileserver() ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) if store_type == MEM_DATASTORE: self.db = InMemDataStore() elif store_type == FS_DATASTORE: self.db = FileSystemDataStore(DATASTORE_DIRECTORY) elif store_type == MIMIR_DATASTORE: self.db = MimirDataStore(DATASTORE_DIRECTORY)
def setUp(self): """Create an empty work trails repository.""" # Create fresh set of directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) vizual = MimirVizualEngine(self.datastore, self.fileserver) self.db = FileSystemViztrailRepository(VIZTRAILS_DIR, {ENV.identifier: ENV})
def setUp(self): """Create empty data store directory.""" # Setup file server and upload file if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) os.mkdir(FILESERVER_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) # Remove directory if it exists if os.path.isdir(DATASTORE_DIRECTORY): shutil.rmtree(DATASTORE_DIRECTORY) os.mkdir(DATASTORE_DIRECTORY) self.db = MimirDataStore(DATASTORE_DIRECTORY)
def set_up(self, engine): """Create an empty file server repository.""" # Drop project descriptor directory if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) # Setup project repository self.fs = DefaultFileServer(FILESERVER_DIR) if engine == ENGINEENV_DEFAULT: self.datastore = FileSystemDataStore(DATASTORE_DIR) self.vizual = DefaultVizualEngine(self.datastore, self.fs) elif engine == ENGINEENV_MIMIR: self.datastore = MimirDataStore(DATASTORE_DIR) self.vizual = MimirVizualEngine(self.datastore, self.fs)
def set_up(self, store_type): """Create empty data store directory.""" if store_type == MEM_DATASTORE: self.db = InMemDataStore() else: # Remove directory if it exists if os.path.isdir(DATASTORE_DIRECTORY): shutil.rmtree(DATASTORE_DIRECTORY) os.mkdir(DATASTORE_DIRECTORY) if store_type == FS_DATASTORE: self.db = FileSystemDataStore(DATASTORE_DIRECTORY) elif store_type == MIMIR_DATASTORE: self.db = MimirDataStore(DATASTORE_DIRECTORY)
class TestLoadMimirDataset(unittest.TestCase): def setUp(self): """Create an empty work trails repository.""" # Cleanup first self.cleanUp() self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) def tearDown(self): """Clean-up by deleting directories. """ self.cleanUp() def cleanUp(self): """Remove datastore and fileserver directory.""" # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_load(self): """Run workflow with default configuration.""" # Ignore files that raised errors (or are taking too much time to load) ignore_files = ['JSONOUTPUTWIDE.csv'] data_types = set() mimir.initialize() for filename in os.listdir(LOAD_DIR): if filename in ignore_files: continue print 'LOAD ' + filename filename = os.path.join(LOAD_DIR, filename) f_handle = self.fileserver.upload_file(filename) ds = self.datastore.load_dataset(f_handle) ds_load = self.datastore.get_dataset(ds.identifier) for col in ds_load.columns: data_types.add(col.data_type) print '\t' + col.name_in_rdb + ' AS ' + col.name + '(' + col.data_type + ')' print '\t' + str(ds.row_count) + ' row(s)' self.assertEquals(len(ds.columns), len(ds_load.columns)) self.assertEquals(ds.column_counter, ds_load.column_counter) self.assertEquals(ds.row_counter, ds_load.row_counter) rows = ds.fetch_rows() self.assertEquals(ds.row_counter, len(rows)) self.assertEquals(ds.row_count, len(rows)) for i in range(len(rows)): row = rows[i] self.assertEquals(row.identifier, i) self.assertEquals(len(row.values), len(ds.columns)) mimir.finalize() print data_types
def set_up_mimir(self): """Setup configuration using Mimir engine.""" env = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), identifier=ENGINEENV_MIMIR, packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON, PACKAGE_MIMIR] ).from_dict({'datastore': {'directory': DATASTORE_DIR}}) self.ENGINE_ID = env.identifier self.set_up() self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.db = FileSystemViztrailRepository( VIZTRAILS_DIR, {env.identifier: env} )
def test_mem_client(self): """Run tests for default engine and in-memory data store.""" self.fs = DefaultFileServer(SERVER_DIR) self.ds = InMemDataStore() self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs)))
def test_fs_client(self): """Run tests for default engine and file server data store.""" self.fs = DefaultFileServer(SERVER_DIR) self.ds = FileSystemDataStore(DATASTORE_DIR) self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs)))
class TestMimirAnnotations(unittest.TestCase): def setUp(self): """Create an empty work trails repository.""" # Create fresh set of directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) vizual = MimirVizualEngine(self.datastore, self.fileserver) self.db = FileSystemViztrailRepository(VIZTRAILS_DIR, {ENV.identifier: ENV}) def tearDown(self): """Clean-up by dropping the MongoDB colelction used by the engine. """ # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_annotations(self): """Test DOMAIN lens.""" # Create new work trail and create dataset from CSV file mimir.initialize() f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name': 'My Project'}) self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset( f_handle.identifier, DS_NAME)) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_value( DS_NAME, ds.column_by_name('AGE').identifier)) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) annos = ds.get_annotations(column_id=1, row_id=2) self.assertEquals(len(annos), 2) for anno in annos: self.assertEquals(anno.key, ANNO_UNCERTAIN) mimir.finalize()
class TestUnicodeHandling(unittest.TestCase): def tearDown(self): """Clean-up by dropping the MongoDB colelction used by the engine. """ # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) def set_up(self): """Create an empty work trails repository.""" # Create fresh set of directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) def set_up_default(self): """Setup configuration using default Vizual engine.""" env = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON] ).from_dict({'datastore': {'directory': DATASTORE_DIR}}) self.ENGINE_ID = env.identifier self.set_up() self.datastore = FileSystemDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.db = FileSystemViztrailRepository( VIZTRAILS_DIR, {env.identifier: env} ) def set_up_mimir(self): """Setup configuration using Mimir engine.""" env = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), identifier=ENGINEENV_MIMIR, packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON, PACKAGE_MIMIR] ).from_dict({'datastore': {'directory': DATASTORE_DIR}}) self.ENGINE_ID = env.identifier self.set_up() self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.db = FileSystemViztrailRepository( VIZTRAILS_DIR, {env.identifier: env} ) def test_vt_default(self): """Run workflow with default configuration.""" # Create new work trail and retrieve the HEAD workflow of the default # branch self.set_up_default() self.run_workflow() def test_vt_mimir(self): """Run workflows for Mimir configurations.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() self.set_up_mimir() self.run_workflow() mimir.finalize() def run_workflow(self): """Test functionality to execute a Python script that creates a dataset containing unicode characters.""" f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(self.ENGINE_ID, {'name' : 'My Project'}) # LOAD DATASET self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # RUN Python Script self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(PYTHON_SCRIPT) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr self.assertFalse(wf.has_error) #print wf.modules[-1].stdout[0]['data'] ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) names = set(c.name.upper().replace('_', ' ') for c in ds.columns) self.assertTrue(len(names), 4) for name in ['DATE', 'IMO CODE', 'PORT OF DEPARTURE', 'PLACE OF RECEIPT']: self.assertTrue(name in names)
class TestWorkflows(unittest.TestCase): def tearDown(self): """Clean-up by dropping the MongoDB colelction used by the engine. """ # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) def set_up(self): """Create an empty work trails repository.""" # Create fresh set of directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) def set_up_default(self): """Setup configuration using default Vizual engine.""" env = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON ]).from_dict({'datastore': { 'directory': DATASTORE_DIR }}) self.ENGINE_ID = env.identifier self.set_up() self.datastore = FileSystemDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.db = FileSystemViztrailRepository(VIZTRAILS_DIR, {env.identifier: env}) def set_up_mimir(self): """Setup configuration using Mimir engine.""" env = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), identifier=ENGINEENV_MIMIR, packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON, PACKAGE_MIMIR ]).from_dict({'datastore': { 'directory': DATASTORE_DIR }}) self.ENGINE_ID = env.identifier self.set_up() self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.db = FileSystemViztrailRepository(VIZTRAILS_DIR, {env.identifier: env}) def test_vt_default(self): """Run workflow with default configuration.""" # Create new work trail and retrieve the HEAD workflow of the default # branch self.set_up_default() self.run_python_workflow() self.set_up_default() self.run_mixed_workflow() self.set_up_default() self.run_delete_modules() self.set_up_default() self.run_erroneous_workflow() self.set_up_default() self.run_update_datasets() def test_vt_mimir(self): """Run workflows for Mimir configurations.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() self.set_up_mimir() self.run_python_workflow() self.set_up_mimir() self.run_mixed_workflow() self.set_up_mimir() self.run_delete_modules() self.set_up_mimir() self.run_erroneous_workflow() mimir.finalize() def run_delete_modules(self): """Test deletion of modules.""" f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'}) #print '(1) CREATE DATASET' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset( f_handle.identifier, DS_NAME)) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) col_age = ds.column_by_name('Age') self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.update_cell( DS_NAME, col_age.identifier, 0, '28')) self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.update_cell( DS_NAME, col_age.identifier, 1, '42')) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) ds = DatasetClient( self.datastore.get_dataset(wf.modules[-1].datasets['people'])) self.assertEquals(int(ds.rows[0].get_value('Age')), 28) self.assertEquals(int(ds.rows[1].get_value('Age')), 42) # DELETE UPDATE CELL self.db.delete_workflow_module(viztrail_id=vt.identifier, module_id=wf.modules[1].identifier) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) ds = DatasetClient( self.datastore.get_dataset(wf.modules[-1].datasets['people'])) self.assertEquals(int(ds.rows[0].get_value('Age')), 23) # DELETE LOAD (will introduce error) self.db.delete_workflow_module(viztrail_id=vt.identifier, module_id=wf.modules[0].identifier) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) # DELETE last remaining module self.db.delete_workflow_module(viztrail_id=vt.identifier, module_id=wf.modules[0].identifier) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) def run_erroneous_workflow(self): """Test workflow that has errors.""" f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'}) #print '(1) CREATE DATASET' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset( f_handle.identifier, DS_NAME)) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) col_age = ds.column_by_name('Age') self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.update_cell( DS_NAME, col_age.identifier, 0, '28')) # This should create an error because of the invalid column name self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.rename_column( DS_NAME, col_age.identifier, '')) # This should not have any effect self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.update_cell( DS_NAME, col_age.identifier, 0, '29')) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) # Make sure that all workflow modules have a non-negative identifier # and that they are all unique identifier = set() for m in wf.modules: self.assertTrue(m.identifier >= 0) self.assertTrue(not m.identifier in identifier) identifier.add(m.identifier) def run_mixed_workflow(self): """Test functionality to execute a workflow module.""" f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'}) #print '(1) CREATE DATASET' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset( f_handle.identifier, DS_NAME)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) cmd_text = wf.modules[-1].command_text self.assertEquals(cmd_text, 'LOAD DATASET people FROM FILE dataset.csv') #print '(2) INSERT ROW' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.insert_row(DS_NAME, 1)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) cmd_text = wf.modules[-1].command_text self.assertEquals(cmd_text, 'INSERT ROW INTO people AT POSITION 1') #print '(3) Set name to Bobby and set variables' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(SET_VARIABLES_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) cmd_text = wf.modules[-1].command_text self.assertEquals(cmd_text, SET_VARIABLES_PY) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) #print '(4) Set age to 28' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.update_cell( DS_NAME, ds.column_by_name('Age').identifier, 1, '28')) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) cmd_text = wf.modules[-1].command_text self.assertEquals(cmd_text.upper(), 'UPDATE PEOPLE SET [AGE,1] = 28') ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) #print '(5) Change Alice to Bob' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.update_cell(DS_NAME, ds.column_by_name('Name').identifier, 0, 'Bob')) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) cmd_text = wf.modules[-1].command_text self.assertEquals(cmd_text.upper(), 'UPDATE PEOPLE SET [NAME,0] = \'BOB\'') #print '(6) UPDATE DATASET WITH FILTER' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(UPDATE_DATASET_WITH_FILTER_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) cmd_text = wf.modules[-1].command_text self.assertEquals(cmd_text, UPDATE_DATASET_WITH_FILTER_PY) self.assertFalse(wf.has_error) # Ensure that all names are Bobby ds = DatasetClient( self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])) age = [23, 28, 32] for i in range(len(ds.rows)): row = ds.rows[i] self.assertEquals(row.get_value('Name'), 'Bobby') self.assertEquals(int(row.get_value('Age')), age[i]) def run_python_workflow(self): """Test functionality to execute a workflow module.""" vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'}) #print '(1) CREATE DATASET' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(CREATE_DATASET_PY)) # from vizier.database.client import VizierDBClient\nv = VizierDBClient(__vizierdb__) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) self.assertEquals(wf.version, 0) self.assertEquals(len(wf.modules), 1) self.assertTrue(len(wf.modules[0].stdout) == 0) self.assertTrue(len(wf.modules[0].stderr) == 0) self.assertEquals(len(wf.modules[0].datasets), 1) self.assertTrue(DS_NAME in wf.modules[0].datasets) #print '(2) PRINT DATASET' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(PRINT_DATASET_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) prev_modules = modules modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) # Ensure that the identifier of previous modules did not change for id in prev_modules: self.assertTrue(id in modules) self.assertEquals(wf.version, 1) self.assertEquals(len(wf.modules), 2) self.assertTrue(len(wf.modules[0].stdout) == 0) self.assertTrue(len(wf.modules[0].stderr) == 0) self.assertEquals(len(wf.modules[0].datasets), 1) self.assertTrue(DS_NAME in wf.modules[0].datasets) self.assertTrue(len(wf.modules[1].stdout) == 1) self.assertTrue(len(wf.modules[1].stderr) == 0) self.assertEquals(wf.modules[1].stdout[0]['data'], 'Alice\nBob') self.assertEquals(len(wf.modules[1].datasets), 1) self.assertTrue(DS_NAME in wf.modules[1].datasets) ds_id = wf.modules[1].datasets[DS_NAME] #print '(3) UPDATE DATASET' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(UPDATE_DATASET_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) prev_modules = modules modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) # Ensure that the identifier of previous modules did not change for id in prev_modules: self.assertTrue(id in modules) self.assertFalse(wf.has_error) self.assertEquals(wf.version, 2) self.assertEquals(len(wf.modules), 3) self.assertTrue(len(wf.modules[0].stdout) == 0) self.assertTrue(len(wf.modules[0].stderr) == 0) self.assertEquals(len(wf.modules[0].datasets), 1) self.assertTrue(DS_NAME in wf.modules[0].datasets) self.assertEquals(wf.modules[0].datasets[DS_NAME], ds_id) self.assertTrue(len(wf.modules[1].stdout) == 1) self.assertTrue(len(wf.modules[1].stderr) == 0) self.assertEquals(wf.modules[1].stdout[0]['data'], 'Alice\nBob') self.assertEquals(len(wf.modules[1].datasets), 1) self.assertTrue(DS_NAME in wf.modules[1].datasets) self.assertEquals(wf.modules[1].datasets[DS_NAME], ds_id) self.assertTrue(len(wf.modules[2].stdout) == 0) self.assertTrue(len(wf.modules[2].stderr) == 0) self.assertEquals(len(wf.modules[2].datasets), 1) self.assertTrue(DS_NAME in wf.modules[2].datasets) self.assertNotEquals(wf.modules[2].datasets[DS_NAME], ds_id) #print '(4) PRINT DATASET' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(PRINT_DATASET_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) prev_modules = modules modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) # Ensure that the identifier of previous modules did not change for id in prev_modules: self.assertTrue(id in modules) self.assertEquals(wf.version, 3) self.assertEquals(len(wf.modules), 4) self.assertEquals(wf.modules[1].stdout[0]['data'], 'Alice\nBob') self.assertTrue(len(wf.modules[3].stdout) == 1) self.assertTrue(len(wf.modules[3].stderr) == 0) self.assertEquals(wf.modules[3].stdout[0]['data'], 'NoName\nNoName') #print '(5) UPDATE DATASET WITH FILTER' self.db.replace_workflow_module( viztrail_id=vt.identifier, module_id=wf.modules[2].identifier, command=cmd.python_cell(UPDATE_DATASET_WITH_FILTER_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) prev_modules = modules modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) # Ensure that the identifier of previous modules did not change for id in prev_modules: self.assertTrue(id in modules) self.assertTrue(wf.has_error) self.assertEquals(wf.version, 4) self.assertEquals(len(wf.modules), 4) # print '(6) INSERT SET VARIABLES BEFORE UPDATE' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(SET_VARIABLES_ONLY_PY), before_id=wf.modules[2].identifier) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[4].stdout[0]['data'], 'Alice\nBobby') #print '(7) INTRODUCE ERROR' self.db.replace_workflow_module( viztrail_id=vt.identifier, module_id=wf.modules[1].identifier, command=cmd.python_cell(PRINT_UNKNOWN_DATASET_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) prev_modules = modules modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) # Ensure that the identifier of previous modules did not change for id in prev_modules: self.assertTrue(id in modules) self.assertTrue(wf.has_error) # Ensure that the second module has output to stderr self.assertNotEquals(len(wf.modules[1].stderr), 0) # Ensure that the last two modules hav no output (either to STDOUT or # STDERR) for m in wf.modules[2:]: self.assertEquals(len(m.stdout), 0) self.assertEquals(len(m.stderr), 0) #print '(8) FIX ERROR' self.db.replace_workflow_module( viztrail_id=vt.identifier, module_id=wf.modules[1].identifier, command=cmd.python_cell(PRINT_DATASET_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) prev_modules = modules modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) # Ensure that the identifier of previous modules did not change for id in prev_modules: self.assertTrue(id in modules) #print (9) DELETE MODULE UPDATE_DATASET_WITH_FILTER_PY self.db.delete_workflow_module(viztrail_id=vt.identifier, module_id=wf.modules[3].identifier) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[3].stdout[0]['data'], 'Alice\nBob') def run_update_datasets(self): """Test dropping and renaming of datasets.""" f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'}) self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset( f_handle.identifier, DS_NAME)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertTrue(DS_NAME in wf.modules[-1].datasets) new_name = DS_NAME + '_renamed' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.rename_dataset( DS_NAME, new_name)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertTrue(DS_NAME in wf.modules[0].datasets) self.assertFalse(new_name in wf.modules[0].datasets) self.assertFalse(DS_NAME in wf.modules[-1].datasets) self.assertTrue(new_name in wf.modules[-1].datasets) self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.drop_dataset(new_name)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertFalse(new_name in wf.modules[-1].datasets) self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.drop_dataset(new_name)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) # Delete the Drop Dataset that failed and replace the first drop with # a Python module that prints names self.db.delete_workflow_module(viztrail_id=vt.identifier, module_id=wf.modules[-1].identifier) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.db.replace_workflow_module(viztrail_id=vt.identifier, module_id=wf.modules[-1].identifier, command=cmd.python_cell(""" for row in vizierdb.get_dataset('""" + new_name + """').rows: print row.get_value('Name') """)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].stdout[0]['data'], 'Alice\nBob') self.assertFalse(DS_NAME in wf.modules[-1].datasets) self.assertTrue(new_name in wf.modules[-1].datasets)
class TestMimirLenses(unittest.TestCase): def setUp(self): """Create an empty work trails repository.""" # Create fresh set of directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) vizual = MimirVizualEngine(self.datastore, self.fileserver) self.db = FileSystemViztrailRepository( VIZTRAILS_DIR, {ENV.identifier: ENV} ) def tearDown(self): """Clean-up by dropping the MongoDB colelction used by the engine. """ # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_domain_lens(self): """Test DOMAIN lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) col_age = ds.column_by_name('Age') self.assertFalse(wf.has_error) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_domain(DS_NAME, col_age.identifier) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr[0] self.assertEquals(wf.modules[-1].command_text.upper(), 'DOMAIN FOR AGE IN PEOPLE') self.assertFalse(wf.has_error) self.assertEquals(len(wf.modules), 2) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) rows = ds.fetch_rows() self.assertNotEquals(rows[2].values[ds.column_index('Age')], '') # Introduce an error. Make sure command formating is correct self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_domain('MY DS', 'MY COL') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'DOMAIN FOR \'MY COL\' IN \'MY DS\'') mimir.finalize() def test_geocode_lens(self): """Test GEOCODE lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(GEO_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertFalse(wf.has_error) # Geocode Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_geocode( DS_NAME, 'GOOGLE', house_nr=ds.column_by_name('STRNUMBER').identifier, street=ds.column_by_name('STRNAME').identifier, city=ds.column_by_name('CITY').identifier, state=ds.column_by_name('STATE').identifier ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr[0] self.assertEquals(wf.modules[-1].command_text.upper(), 'GEOCODE HOUSE_NUMBER=STRNUMBER,STREET=STRNAME,CITY=CITY,STATE=STATE PEOPLE USING GOOGLE') self.assertFalse(wf.has_error) self.assertEquals(len(wf.modules), 2) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 6) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_geocode( DS_NAME, 'GOOGLE' ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr[0] self.assertEquals(wf.modules[-1].command_text.upper(), 'GEOCODE PEOPLE USING GOOGLE') self.assertFalse(wf.has_error) self.assertEquals(len(wf.modules), 3) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 8) mimir.finalize() def test_key_repair_lens(self): """Test KEY REPAIR lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(KEY_REPAIR_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) ds1 = self.datastore.get_dataset(wf.modules[0].datasets[DS_NAME]) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_key_repair(DS_NAME, ds1.column_by_name('Empid').identifier) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'KEY REPAIR FOR EMPID IN ' + DS_NAME.upper()) # Get dataset ds2 = self.datastore.get_dataset(wf.modules[0].datasets[DS_NAME]) self.assertEquals(ds1.row_count, ds2.row_count) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 4) self.assertEquals(ds.row_count, 2) names = set() empids = set() rowids = set() for row in DatasetClient(dataset=ds).rows: rowids.add(row.identifier) empids.add(int(row.get_value('empid'))) names.add(row.get_value('name')) self.assertTrue(1 in empids) self.assertTrue(2 in rowids) self.assertTrue('Alice' in names) self.assertTrue('Carla' in names) # Test error case and command text self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_key_repair('MY DS', 'MY COL') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'KEY REPAIR FOR \'MY COL\' IN \'MY DS\'') mimir.finalize() def test_missing_value_lens(self): """Test MISSING_VALUE lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertFalse(wf.has_error) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_value(DS_NAME, ds.column_by_name('AGE').identifier) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper()) self.assertEquals(len(wf.modules), 2) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) rows = ds.fetch_rows() self.assertNotEquals(rows[2].values[ds.column_index('Age')], '') # Annotations annotations = ds.get_annotations(column_id=1, row_id=4) self.assertEquals(len(annotations), 2) # MISSING VALUE Lens with value constraint vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'New Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_value( DS_NAME, ds.column_by_name('AGE').identifier, constraint='> 30') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr[0] self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper() + ' WITH CONSTRAINT > 30') #self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper()) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) rows = ds.fetch_rows() self.assertTrue(rows[2].values[ds.column_index('Age')] > 30) # Command text in case of error self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_value('MY DS', '?', constraint='A B') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) cmd_text = wf.modules[-1].command_text.upper() expected_text = 'MISSING VALUES FOR ? IN \'MY DS\'' + ' WITH CONSTRAINT A B' self.assertEquals(cmd_text, expected_text) mimir.finalize() def test_missing_key_lens(self): """Test MISSING_KEY lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) # Missing Value Lens age_col = ds.columns[ds.column_index('Age')].identifier self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_key(DS_NAME, age_col, missing_only=True) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING KEYS FOR AGE IN ' + DS_NAME.upper()) self.assertFalse(wf.has_error) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 3) rows = ds.fetch_rows() self.assertEquals(len(rows), 24) #self.db.append_workflow_module( # viztrail_id=vt.identifier, # command=cmd.load_dataset(f_handle.identifier, DS_NAME + '2') #) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_key( DS_NAME, ds.columns[ds.column_index('Salary')].identifier, missing_only=True ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 3) rows = ds.fetch_rows() self.assertEquals(len(rows), 55) mimir.finalize() def test_picker_lens(self): """Test PICKER lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(PICKER_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # Missing Value Lens ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_picker(DS_NAME, [ {'pickFrom': ds.column_by_name('Age').identifier}, {'pickFrom': ds.column_by_name('Salary').identifier} ]) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.modules[-1].has_error: print wf.modules[-1].stderr self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'PICK FROM AGE,SALARY IN ' + DS_NAME.upper()) # Get dataset self.assertEquals(len(wf.modules[-1].datasets), 1) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) columns = [c.name for c in ds.columns] self.assertEquals(len(ds.columns), 5) self.assertTrue('PICK_ONE_AGE_SALARY' in columns) # Pick another column, this time with custom name self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_picker(DS_NAME, [ {'pickFrom': ds.column_by_name('Age').identifier}, {'pickFrom': ds.column_by_name('Salary').identifier} ], pick_as='My Column') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # Get dataset self.assertEquals(len(wf.modules[-1].datasets), 1) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) columns = [c.name for c in ds.columns] self.assertEquals(len(ds.columns), 6) self.assertTrue('PICK_ONE_AGE_SALARY' in columns) self.assertTrue('My Column' in columns) # Pick from a picked column self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_picker(DS_NAME, [ {'pickFrom': ds.column_by_name('Age').identifier}, {'pickFrom': ds.column_by_name('PICK_ONE_AGE_SALARY').identifier} ], pick_as='My Column') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.modules[-1].has_error: print wf.modules[-1].stderr self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'PICK FROM AGE,PICK_ONE_AGE_SALARY AS \'MY COLUMN\' IN ' + DS_NAME.upper()) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) mimir.finalize() def test_schema_matching_lens(self): """Test SCHEMA_MATCHING lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_schema_matching(DS_NAME, [ {'column': 'BDate', 'type': 'int'}, {'column': 'PName', 'type': 'varchar'} ], 'new_' + DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT, PNAME VARCHAR) AS NEW_' + DS_NAME.upper()) # Get dataset self.assertEquals(len(wf.modules[-1].datasets), 2) ds = self.datastore.get_dataset(wf.modules[-1].datasets['new_' + DS_NAME]) self.assertEquals(len(ds.columns), 2) self.assertEquals(ds.row_count, 2) # Error if adding an existing dataset self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_schema_matching( DS_NAME, [{'column': 'BDate', 'type': 'int'}], 'new_' + DS_NAME ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) self.db.replace_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_schema_matching( DS_NAME, [{'column': 'BDate', 'type': 'int'}], 'a_new_' + DS_NAME ), module_id=wf.modules[-1].identifier, ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT) AS A_NEW_' + DS_NAME.upper()) # Error when adding a dataset with an invalid name self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_schema_matching( DS_NAME, [{'column': 'BDate', 'type': 'int'}], 'SOME NAME' ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT) AS \'SOME NAME\'') mimir.finalize() def test_type_inference_lens(self): """Test TYPE INFERENCE lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds1 = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertFalse(wf.has_error) # Infer type self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_type_inference(DS_NAME, 0.6) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) print wf.modules[-1].command_text.upper() self.assertEquals(wf.modules[-1].command_text.upper(), 'TYPE INFERENCE FOR COLUMNS IN ' + DS_NAME.upper() + ' WITH PERCENT_CONFORM = 0.6') # Get dataset ds2 = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds2.columns), 3) self.assertEquals(ds2.row_count, 7) ds1_rows = ds1.fetch_rows() ds2_rows = ds2.fetch_rows() for i in range(ds2.row_count): self.assertEquals(ds1_rows[i].values, ds2_rows[i].values) mimir.finalize()
# Currently uses the default file server fileserver = DefaultFileServer(config.fileserver.directory) # Create datastore for the API. Different execution environments may use # different data stores. The API needs to be able to serve datasets from all # of them. Thus, if more than one execution environment is specified we need # to use a federated datastore. Individual viztrails will create their own # instances of their respective data store. datastores = list() for env_id in config.envs: env_conf = config.envs[env_id] if env_id == ENGINEENV_DEFAULT: datastores.append(FileSystemDataStore(env_conf.datastore.directory)) elif env_id == ENGINEENV_MIMIR: datastores.append(MimirDataStore(env_conf.datastore.directory)) else: raise RuntimeError('unknown execution environment \'' + env_id + '\'') # Federate data stores if more than one was given if len(datastores) > 1: datastore = FederatedDataStore(datastores) else: datastore = datastores[0] viztrails = FileSystemViztrailRepository(config.viztrails.directory, config.envs) # Initialize the Web Service API. api = VizierWebService(viztrails, datastore, fileserver, config) # ------------------------------------------------------------------------------
class TestLoadMimirDataset(unittest.TestCase): def setUp(self): """Create an empty work trails repository.""" # Cleanup first self.cleanUp() self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.vizual = MimirVizualEngine(self.datastore, self.fileserver) def tearDown(self): """Clean-up by deleting directories. """ self.cleanUp() def cleanUp(self): """Remove datastore and fileserver directory.""" # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_load(self): """Run workflow with default configuration.""" mimir.initialize() self.update_cell(CSV_FILE, 2, 0, 'int', 10) self.update_cell(CSV_FILE, 2, 0, 'int', 10.3, result_type='real') self.update_cell(CSV_FILE, 2, 0, 'int', None) self.update_cell(CSV_FILE, 3, 0, 'real', 10.3) self.update_cell(CSV_FILE, 3, 0, 'real', 10, result_value=10.0) self.update_cell(CSV_FILE, 3, 0, 'real', 'A', result_type='varchar') self.update_cell(CSV_FILE, 3, 0, 'real', None) self.update_cell(CSV_FILE, 4, 0, 'varchar', 'A') self.update_cell(CSV_FILE, 4, 0, 'varchar', 10, result_value='10') self.update_cell(CSV_FILE, 4, 0, 'varchar', 10.87, result_value='10.87') self.update_cell(CSV_FILE, 4, 0, 'varchar', None) self.update_cell(CSV_FILE, 8, 0, 'bool', 'False', result_value=False) self.update_cell(CSV_FILE, 8, 0, 'bool', '0', result_value=False) self.update_cell(CSV_FILE, 8, 0, 'bool', None) self.update_cell(CSV_FILE, 8, 1, 'bool', True, result_value=True) self.update_cell(CSV_FILE, 8, 1, 'bool', '1', result_value=True) self.update_cell(CSV_FILE, 8, 1, 'bool', 'A', result_value='A', result_type='varchar') self.update_cell(CSV_FILE, 8, 1, 'bool', 10.87, result_value='10.87', result_type='varchar') self.update_cell(CSV_FILE_DT, 1, 0, 'date', '2018-05-09') self.update_cell(CSV_FILE_DT, 1, 0, 'date', '20180509', result_value='20180509', result_type='varchar') self.update_cell(CSV_FILE_DT, 1, 0, 'date', None) self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', '2018-05-09 12:03:22.0000') self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', 'ABC', result_value='ABC', result_type='varchar') self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', None) mimir.finalize() def update_cell(self, filename, col, row, data_type, value, result_value=None, result_type=None): """Update the value of the given cell. The column data type is expected to match the given datatype. The optional result value is the expected value of the cell in the modified dataset. """ f_handle = self.fileserver.upload_file(filename) ds = self.datastore.load_dataset(f_handle) #print [c.name_in_rdb + ' AS ' + c.name + '(' + c.data_type + ')' for c in ds.columns] self.assertEquals(ds.columns[col].data_type, data_type) rows = ds.fetch_rows() self.assertNotEquals(rows[row].values[col], value) _, ds_id = self.vizual.update_cell(ds.identifier, col, row, value) ds = self.datastore.get_dataset(ds_id) #print [c.name_in_rdb + ' AS ' + c.name + '(' + c.data_type + ')' for c in ds.columns] if result_type is None: self.assertEquals(ds.columns[col].data_type, data_type) else: self.assertEquals(ds.columns[col].data_type, result_type) rows = ds.fetch_rows() if result_value is None: self.assertEquals(rows[row].values[col], value) else: self.assertEquals(rows[row].values[col], result_value) self.fileserver.delete_file(f_handle.identifier)
DATASTORE_DIR = './env/ds' FILESERVER_DIR = './env/fs' CSV_FILE = '../data/mimir/Employee.csv' #pick.csv def cleanUp(): """Remove datastore and fileserver directory.""" # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) cleanUp() datastore = MimirDataStore(DATASTORE_DIR) fileserver = DefaultFileServer(FILESERVER_DIR) vizual = MimirVizualEngine(datastore, fileserver) mimir.initialize() filename = CSV_FILE print 'LOAD ' + filename f_handle = fileserver.upload_file(filename) ds = datastore.load_dataset(f_handle) ds_load = datastore.get_dataset(ds.identifier) print [col.name_in_rdb + ' AS ' + col.name + '(' + col.data_type + ')' for col in ds_load.columns] print str(ds.row_count) + ' row(s)' rows = ds.fetch_rows() for i in range(len(rows)):
print str(ds.row_count) + ' row(s)' rows = ds.fetch_rows() for i in range(len(rows)): row = rows[i] print row.values cleanUp() ENV = ExecEnv(FileServerConfig().from_dict({'directory': FILESERVER_DIR}), identifier=ENGINEENV_MIMIR).from_dict( {'datastore': { 'directory': DATASTORE_DIR }}) datastore = MimirDataStore(DATASTORE_DIR) fileserver = DefaultFileServer(FILESERVER_DIR) vizual = MimirVizualEngine(datastore, fileserver) db = FileSystemViztrailRepository(VIZTRAILS_DIR, {ENV.identifier: ENV}) mimir.initialize() vt = db.create_viztrail(ENV.identifier, {'name': 'My Project'}) # # LOAD DATASET # f_handle = fileserver.upload_file(CSV_FILE) db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME))
class TestDataStore(unittest.TestCase): def setUp(self): """Create empty data store directory.""" # Setup file server and upload file if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) os.mkdir(FILESERVER_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) # Remove directory if it exists if os.path.isdir(DATASTORE_DIRECTORY): shutil.rmtree(DATASTORE_DIRECTORY) os.mkdir(DATASTORE_DIRECTORY) self.db = MimirDataStore(DATASTORE_DIRECTORY) def tearDown(self): """Delete data store directory. """ for d in [DATASTORE_DIRECTORY, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_datastore(self): """Test functionality of the file server data store.""" mimir.initialize() ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) self.assertEquals(ds.column_counter, 3) self.assertEquals(ds.row_counter, 2) self.assertEquals(ds.row_count, 2) cols = [('NAME', COL_PREFIX + '0', 'varchar'), ('AGE', COL_PREFIX + '1', 'int'), ('SALARY', COL_PREFIX + '2', 'varchar')] control_rows = [(0, ['Alice', 23, '35K']), (1, ['Bob', 32, '30K'])] for column in ds.columns: self.validate_column(column, cols[column.identifier]) self.validate_rowid_column(ds.rowid_column) self.validate_rows(ds.fetch_rows(), control_rows) # Get dataset and repeat tests ds = self.db.get_dataset(ds.identifier) self.assertEquals(ds.column_counter, 3) self.assertEquals(ds.row_counter, 2) self.assertEquals(len(ds.row_ids), 2) for column in ds.columns: self.validate_column(column, cols[column.identifier]) self.validate_rowid_column(ds.rowid_column) self.validate_rows(ds.fetch_rows(), control_rows) # Create dataset names = ['NAME', 'AGE', 'SALARY'] rows = ds.fetch_rows() rows[0].values[0] = 'Jane' rows = [rows[1], rows[0]] ds = self.db.create_dataset(columns=ds.columns, rows=rows) ds = self.db.get_dataset(ds.identifier) for i in range(3): col = ds.columns[i] self.assertEquals(col.identifier, i) self.assertEquals(col.name, names[i]) rows = ds.fetch_rows() for i in range(len(rows)): row = rows[(len(rows) - 1) - i] self.assertEquals(row.identifier, i) self.assertEquals(rows[1].values[0], 'Jane') # DONE mimir.finalize() def validate_column(self, column, col_props): """Validate that column name and data type are as expected.""" name, name_in_rdb, data_type = col_props self.assertEquals(column.name, name) self.assertEquals(column.name_in_rdb, name_in_rdb) self.assertEquals(column.data_type, data_type) def validate_rowid_column(self, col): """Ensure the row id column has the correct name and a data type.""" self.assertEquals(col.name, col.name_in_rdb) self.assertEquals(col.name, ROW_ID) self.assertEquals(col.data_type, 'int') def validate_rows(self, dataset_rows, control_rows): """Make sure all data is read correctly.""" self.assertEquals(len(dataset_rows), len(control_rows)) for i in range(len(dataset_rows)): ds_row = dataset_rows[i] row_id, values = control_rows[i] self.assertEquals(ds_row.identifier, row_id) self.assertEquals(ds_row.values, values)
class TestDatasetPaginationReader(unittest.TestCase): def set_up(self, engine): """Create an empty file server repository.""" # Drop project descriptor directory if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) # Setup project repository self.fs = DefaultFileServer(FILESERVER_DIR) if engine == ENGINEENV_DEFAULT: self.datastore = FileSystemDataStore(DATASTORE_DIR) self.vizual = DefaultVizualEngine(self.datastore, self.fs) elif engine == ENGINEENV_MIMIR: self.datastore = MimirDataStore(DATASTORE_DIR) self.vizual = MimirVizualEngine(self.datastore, self.fs) def tear_down(self, engine): """Clean-up by dropping file server directory. """ # Drop data store directory if os.path.isdir(DATASTORE_DIR): shutil.rmtree(DATASTORE_DIR) # Drop project descriptor directory if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) def test_default_engine(self): """Test functionality for the default setup.""" self.run_tests(ENGINEENV_DEFAULT) def test_mimir_engine(self): """Test functionality for the Mimir setup.""" import vistrails.packages.mimir.init as mimir mimir.initialize() self.run_tests(ENGINEENV_MIMIR) mimir.finalize() def run_tests(self, engine): """Run sequence of tests for given configuration.""" self.set_up(engine) ds = self.vizual.load_dataset(self.fs.upload_file(CSV_FILE).identifier) rows = ds.fetch_rows() self.assertEquals(len(rows), 7) rows = ds.fetch_rows(offset=1) self.assertEquals(len(rows), 6) self.assertEquals(rows[0].values[0], 'Bob') self.assertEquals(rows[5].values[0], 'Gertrud') rows = ds.fetch_rows(limit=2) self.assertEquals(len(rows), 2) self.assertEquals(rows[0].values[0], 'Alice') self.assertEquals(rows[1].values[0], 'Bob') rows = ds.fetch_rows(offset=4, limit=3) self.assertEquals(len(rows), 3) self.assertEquals(rows[0].values[0], 'Eileen') self.assertEquals(rows[2].values[0], 'Gertrud') rows = ds.fetch_rows(offset=5, limit=3) self.assertEquals(len(rows), 2) self.assertEquals(rows[0].values[0], 'Frank') self.assertEquals(rows[1].values[0], 'Gertrud') rows = ds.fetch_rows(offset=6, limit=3) self.assertEquals(len(rows), 1) self.assertEquals(rows[0].values[0], 'Gertrud') # Test larger dataset with deletes ds = self.vizual.load_dataset(self.fs.upload_file(TSV_FILE).identifier) rows = ds.fetch_rows(offset=0, limit=10) self.assertEquals(len(rows), 10) self.assertEquals([r.identifier for r in rows], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) _, id1 = self.vizual.delete_row(ds.identifier, 2) # ID=2 _, id2 = self.vizual.delete_row(id1, 4) # ID=5 ds = self.datastore.get_dataset(id2) rows = ds.fetch_rows(offset=0, limit=10) self.assertEquals([r.identifier for r in rows], [0, 1, 3, 4, 6, 7, 8, 9, 10, 11]) _, id1 = self.vizual.move_row(ds.identifier, 9, 1) # ID=11 _, id2 = self.vizual.move_row(id1, 9, 1) # ID=10 ds = self.datastore.get_dataset(id2) rows = ds.fetch_rows(offset=1, limit=10) self.assertEquals([r.identifier for r in rows], [10, 11, 1, 3, 4, 6, 7, 8, 9, 12]) rows = ds.fetch_rows(offset=2, limit=10) self.assertEquals([r.identifier for r in rows], [11, 1, 3, 4, 6, 7, 8, 9, 12, 13]) rows = ds.fetch_rows(offset=3, limit=10) self.assertEquals([r.identifier for r in rows], [1, 3, 4, 6, 7, 8, 9, 12, 13, 14]) self.tear_down(engine)
class TestDataStore(unittest.TestCase): def setup_fileserver(self): """Create a fresh file server.""" if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) os.mkdir(FILESERVER_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) def set_up(self, store_type): """Create empty data store directory.""" if store_type == MEM_DATASTORE: self.db = InMemDataStore() else: # Remove directory if it exists if os.path.isdir(DATASTORE_DIRECTORY): shutil.rmtree(DATASTORE_DIRECTORY) os.mkdir(DATASTORE_DIRECTORY) if store_type == FS_DATASTORE: self.db = FileSystemDataStore(DATASTORE_DIRECTORY) elif store_type == MIMIR_DATASTORE: self.db = MimirDataStore(DATASTORE_DIRECTORY) def tear_down(self, store_type): """Delete data store directory. """ for d in [DATASTORE_DIRECTORY, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_federated_datastore(self): """Test functionality of the federated data store.""" self.setup_fileserver() store1 = InMemDataStore() store2 = InMemDataStore() fh = self.fileserver.upload_file(CSV_FILE) ds1 = store1.load_dataset(fh) ds2 = store2.load_dataset(fh) fed_store = FederatedDataStore([store1, store2]) self.assertIsNotNone(fed_store.get_dataset(ds1.identifier)) self.assertIsNotNone(fed_store.get_dataset(ds2.identifier)) self.assertIsNone(fed_store.get_dataset('UNDEFINED')) with self.assertRaises(NotImplementedError): fed_store.load_dataset(fh) self.assertIsNotNone(fed_store.update_annotation(ds1.identifier, column_id=0, key='name', value='My Name')) self.assertIsNotNone(fed_store.update_annotation(ds2.identifier, column_id=0, key='name', value='My Name')) self.assertIsNone(fed_store.update_annotation('UNDEFINED', column_id=0, key='name', value='My Name')) def test_fs_datastore(self): """Run test for file system datastore.""" self.run_tests(FS_DATASTORE) def test_mem_datastore(self): """Run test for in-memory datastore.""" self.run_tests(MEM_DATASTORE) def test_mimir_datastore(self): """Run test for Mimir datastore.""" mimir.initialize() self.run_tests(MIMIR_DATASTORE) self.set_up(MIMIR_DATASTORE) self.load_tsv() self.tear_down(MIMIR_DATASTORE) mimir.finalize() def test_volatile_datastore(self): """Test volatile data store on top of a file system data store.""" self.set_up(FS_DATASTORE) self.setup_fileserver() ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) ds_rows = ds.fetch_rows() self.assertEquals(len(ds_rows), ds.row_count) v_store = VolatileDataStore(self.db) # Make sure the existing dataset is accessible via the volatile store v_ds = DatasetClient(dataset=v_store.get_dataset(ds.identifier)) self.assertIsNotNone(v_ds) self.assertEquals(v_ds.get_cell('Salary', 1), '30K') # Create an updated dataset. The original should be the same in both # stores v_ds.rows[1].set_value('Salary', '40K') v_ds = v_store.create_dataset(columns=v_ds.columns, rows=v_ds.rows) self.assertEquals(DatasetClient(dataset=self.db.get_dataset(ds.identifier)).get_cell('Salary', 1), '30K') self.assertEquals(DatasetClient(dataset=v_store.get_dataset(ds.identifier)).get_cell('Salary', 1), '30K') self.assertEquals(DatasetClient(dataset=v_store.get_dataset(v_ds.identifier)).get_cell('Salary', 1), '40K') self.assertIsNone(self.db.get_dataset(v_ds.identifier)) # Delete both datasets. The volatile store is empty. The original should # be unchanged. self.assertTrue(v_store.delete_dataset(ds.identifier)) self.assertTrue(v_store.delete_dataset(v_ds.identifier)) self.assertFalse(v_store.delete_dataset(ds.identifier)) self.assertFalse(v_store.delete_dataset(v_ds.identifier)) self.assertIsNone(v_store.get_dataset(ds.identifier)) self.assertIsNone(v_store.get_dataset(v_ds.identifier)) self.assertEquals(DatasetClient(dataset=self.db.get_dataset(ds.identifier)).get_cell('Salary', 1), '30K') self.tear_down(FS_DATASTORE) def run_tests(self, store_type): """Run sequence of test for given data store type.""" self.set_up(store_type) self.dataset_life_cycle() self.tear_down(store_type) self.set_up(store_type) self.datastore_init(store_type) self.tear_down(store_type) self.set_up(store_type) self.dataset_read() self.tear_down(store_type) self.set_up(store_type) self.dataset_column_index() self.tear_down(store_type) def datastore_init(self, store_type): """Test initalizing a datastore with existing datasets.""" self.setup_fileserver() ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) if store_type == MEM_DATASTORE: self.db = InMemDataStore() elif store_type == FS_DATASTORE: self.db = FileSystemDataStore(DATASTORE_DIRECTORY) elif store_type == MIMIR_DATASTORE: self.db = MimirDataStore(DATASTORE_DIRECTORY) def dataset_column_index(self): """Test the column by id index of the dataset handle.""" self.setup_fileserver() ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) # Ensure that the project data has three columns and two rows self.assertEquals(ds.column_by_id(0).name.upper(), 'NAME') self.assertEquals(ds.column_by_id(1).name.upper(), 'AGE') self.assertEquals(ds.column_by_id(2).name.upper(), 'SALARY') with self.assertRaises(ValueError): ds.column_by_id(5) ds.columns.append(DatasetColumn(identifier=5, name='NEWNAME')) self.assertEquals(ds.column_by_id(5).name.upper(), 'NEWNAME') with self.assertRaises(ValueError): ds.column_by_id(4) def dataset_life_cycle(self): """Test create and delete dataset.""" self.setup_fileserver() ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) # Ensure that the project data has three columns and two rows self.assertEquals(len(ds.columns), 3) self.assertEquals(len(ds.fetch_rows()), 2) self.assertEquals(ds.row_count, 2) # Delete dataset and ensure that the dataset directory no longer exists self.assertTrue(self.db.delete_dataset(ds.identifier)) self.assertFalse(self.db.delete_dataset(ds.identifier)) def dataset_read(self): """Test reading a dataset.""" self.setup_fileserver() dh = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) ds = self.db.get_dataset(dh.identifier) ds_rows = ds.fetch_rows() self.assertEquals(dh.identifier, ds.identifier) self.assertEquals(len(dh.columns), len(ds.columns)) self.assertEquals(len(dh.fetch_rows()), len(ds_rows)) self.assertEquals(len(dh.fetch_rows()), len(ds_rows)) self.assertEquals(dh.row_count, len(ds_rows)) # Name,Age,Salary # Alice,23,35K # Bob,32,30K self.assertEquals(ds.column_index('Name'), 0) self.assertEquals(ds.column_index('Age'), 1) self.assertEquals(ds.column_index('Salary'), 2) row = ds_rows[0] self.assertEquals(row.values[0], 'Alice') self.assertEquals(int(row.values[1]), 23) self.assertEquals(row.values[2], '35K') row = ds_rows[1] self.assertEquals(row.values[0], 'Bob') self.assertEquals(int(row.values[1]), 32) self.assertEquals(row.values[2], '30K') def load_tsv(self): """Test writing a dataset with duplicate name twice.""" self.setup_fileserver() fh = self.fileserver.upload_file(TSV_FILE) ds = self.db.load_dataset(fh) self.assertEquals(len(ds.columns), 3) self.assertEquals(ds.row_count, 2)
class TestVizualEngine(unittest.TestCase): def set_up(self, engine): """Create an empty file server repository.""" # Drop project descriptor directory if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) # Setup project repository self.fs = DefaultFileServer(FILESERVER_DIR) if engine == ENGINEENV_DEFAULT: self.datastore = FileSystemDataStore(DATASTORE_DIR) self.vizual = DefaultVizualEngine(self.datastore, self.fs) elif engine == ENGINEENV_MIMIR: self.datastore = MimirDataStore(DATASTORE_DIR) self.vizual = MimirVizualEngine(self.datastore, self.fs) self.file = self.fs.upload_file(CSV_FILE) def tear_down(self, engine): """Clean-up by dropping file server directory. """ # Drop data store directory if os.path.isdir(DATASTORE_DIR): shutil.rmtree(DATASTORE_DIR) # Drop project descriptor directory if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) def test_default_engine(self): """Test functionality if the default VizUAL engine.""" self.run_engine_tests(ENGINEENV_DEFAULT) def test_mimir_engine(self): """Test functionality if the Mimir VizUAL engine.""" import vistrails.packages.mimir.init as mimir mimir.initialize() self.run_engine_tests(ENGINEENV_MIMIR) mimir.finalize() def run_engine_tests(self, engine): """Run sequence of tests for given engine.""" self.load_dataset(engine) self.insert_column(engine) self.insert_row(engine) self.delete_column(engine) self.delete_row(engine) self.move_column(engine) self.move_row(engine) self.rename_column(engine) self.update_cell(engine) self.filter_columns(engine) self.sort_dataset(engine) self.sequence_of_steps(engine) def delete_column(self, engine): """Test functionality to delete a column.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Delete Age column col_id = ds.column_by_name('AGE').identifier col_count, id1 = self.vizual.delete_column(ds.identifier, col_id) del col_ids[1] # Result should indicate that one column was deleted. The identifier of # the resulting dataset should differ from the identifier of the # original dataset self.assertEquals(col_count, 1) self.assertNotEquals(id1, ds.identifier) # Retrieve modified dataset and ensure that it cobtains the following # # Name, Salary # ------------ # Alice, 35K # Bob, 30K ds = self.datastore.get_dataset(id1) ds_rows = ds.fetch_rows() # Schema is Name, Salary self.assertEquals(len(ds.columns), 2) self.assertEquals(ds.columns[0].name.upper(), 'NAME') self.assertEquals(ds.columns[1].name.upper(), 'SALARY') # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Make sure that all rows only have two columns row = ds_rows[0] self.assertEquals(len(row.values), 2) self.assertEquals(len(row.values), 2) self.assertEquals(row.values[0], 'Alice') self.assertEquals(row.values[1], '35K') row = ds_rows[1] self.assertEquals(len(row.values), 2) self.assertEquals(len(row.values), 2) self.assertEquals(row.values[0], 'Bob') self.assertEquals(row.values[1], '30K') # Ensure that row identifier haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Ensure exception is thrown if dataset identifier is unknown with self.assertRaises(ValueError): self.vizual.delete_column('unknown:uri', 0) self.tear_down(engine) def delete_row(self, engine): """Test functionality to delete a row.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Delete second row row_count, id1 = self.vizual.delete_row(ds.identifier, 1) del row_ids[1] # Result should indicate that one row was deleted. The identifier of the # resulting dataset should differ from the identifier of the original # dataset self.assertEquals(row_count, 1) self.assertNotEquals(id1, ds.identifier) # Retrieve modified dataset and ensure that it contains the following # data: # # Name, Age, Salary # ------------ # Alice, 23, 35K ds = self.datastore.get_dataset(id1) ds_rows = ds.fetch_rows() # Schema is Name, Salary col_names = ['Name', 'Age', 'Salary'] self.assertEquals(len(ds.columns), len(col_names)) for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].name.upper(), col_names[i].upper()) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # There should only be one row self.assertEquals(len(ds_rows), 1) # Ensure that row identifier haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Ensure exception is thrown if dataset identifier is unknown with self.assertRaises(ValueError): self.vizual.delete_row('unknown:uri', 1) # Ensure exception is thrown if row index is out of bounds with self.assertRaises(ValueError): self.vizual.delete_row(ds.identifier, 100) self.tear_down(engine) def filter_columns(self, engine): """Test projection of a dataset.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) count, ds_id = self.vizual.filter_columns(ds.identifier, [2, 0], ['BD', None]) ds = self.datastore.get_dataset(ds_id) self.assertEquals(len(ds.columns), 2) self.assertEquals(ds.columns[0].identifier, 2) self.assertEquals(ds.columns[0].name.upper(), 'BD') self.assertEquals(ds.columns[1].identifier, 0) self.assertEquals(ds.columns[1].name.upper(), 'NAME') rows = ds.fetch_rows() self.assertEquals(rows[0].values, ['35K', 'Alice']) self.assertEquals(rows[1].values, ['30K', 'Bob']) with self.assertRaises(ValueError): self.vizual.filter_columns(ds.identifier, [0, 1], ['BD', None]) self.tear_down(engine) def insert_column(self, engine): """Test functionality to insert a columns.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Insert columns at position 1 col_ids.insert(1, ds.column_counter) col_count, id1 = self.vizual.insert_column(ds.identifier, 1, 'Height') # Result should indicate that one column was inserted. The identifier of # the resulting dataset should differ from the identifier of the # original dataset self.assertEquals(col_count, 1) self.assertNotEquals(id1, ds.identifier) # Retrieve dataset and ensure that it has the following schema: # Name, Height, Age, Salary ds = self.datastore.get_dataset(id1) col_names = ['Name', 'Height', 'Age', 'Salary'] # Ensure that there are four rows self.assertEquals(len(ds.columns), len(col_names)) for i in range(len(col_names)): col = ds.columns[i] self.assertEquals(col.identifier, col_ids[i]) self.assertEquals(col.name.upper(), col_names[i].upper()) # Insert columns at last position col_ids.append(ds.column_counter) col_names.append('Weight') col_count, id2 = self.vizual.insert_column(id1, 4, 'Weight') # Result should indicate that one column was deleted. The identifier of # the resulting dataset should differ from the identifier of the # previous dataset self.assertEquals(col_count, 1) self.assertNotEquals(id1, id2) # Retrieve dataset and ensure that it has the following schema: # Name, Height, Age, Salary, Weight ds = self.datastore.get_dataset(id2) ds_rows = ds.fetch_rows() # Ensure that there are five rows self.assertEquals(len(ds.columns), len(col_names)) for i in range(len(col_names)): col = ds.columns[i] self.assertEquals(col.identifier, col_ids[i]) self.assertEquals(col.name.upper(), col_names[i].upper()) # The cell values for new columns are None all other values are not None for row in ds_rows: for i in range(len(ds.columns)): if i == 1 or i == 4: self.assertTrue(is_null(row.values[i])) else: self.assertFalse(is_null(row.values[i])) # Ensure that row identifier haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Ensure exception is thrown if dataset identifier is unknown with self.assertRaises(ValueError): self.vizual.insert_column('unknown:uri', 1, 'Height') # Ensure exception is thrown if column name is invalid self.vizual.insert_column(ds.identifier, 1, 'Height from ground') with self.assertRaises(ValueError): self.vizual.insert_column(ds.identifier, 1, 'Height from ground!@#') # Ensure exception is thrown if column position is out of bounds with self.assertRaises(ValueError): self.vizual.insert_column(ds.identifier, 100, 'Height') self.tear_down(engine) def insert_row(self, engine): """Test functionality to insert a row.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Insert row at index position 1 row_ids.insert(1, ds.row_counter) # Result should indicate that one row was inserted. The identifier of # the resulting dataset should differ from the identifier of the # original dataset row_count, id1 = self.vizual.insert_row(ds.identifier, 1) self.assertEquals(row_count, 1) self.assertNotEquals(id1, ds.identifier) # Retrieve modified dataset ds = self.datastore.get_dataset(id1) ds_rows = ds.fetch_rows() # Ensure that there are three rows self.assertEquals(len(ds_rows), 3) # The second row has empty values for each column row = ds_rows[1] self.assertEquals(len(row.values), len(ds.columns)) for i in range(len(ds.columns)): self.assertTrue(is_null(row.values[i])) # Append row at end current dataset row_ids.append(ds.row_counter) row_count, id2 = self.vizual.insert_row(id1, 3) self.assertEquals(row_count, 1) self.assertNotEquals(id1, id2) ds = self.datastore.get_dataset(id2) ds_rows = ds.fetch_rows() # Ensure that there are three rows self.assertEquals(len(ds_rows), 4) # The next to last row has non-empty values for each column row = ds_rows[2] self.assertEquals(len(row.values), len(ds.columns)) for i in range(len(ds.columns)): self.assertFalse(is_null(row.values[i])) # The last row has empty values for each column row = ds_rows[3] self.assertEquals(len(row.values), len(ds.columns)) for i in range(len(ds.columns)): self.assertTrue(is_null(row.values[i])) # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Ensure exception is thrown if dataset identifier is unknown with self.assertRaises(ValueError): self.vizual.insert_row('unknown:uri', 1) # Ensure exception is thrown if row index is out of bounds with self.assertRaises(ValueError): self.vizual.insert_row(ds.identifier, 5) # Ensure no exception is raised self.vizual.insert_row(ds.identifier, 4) self.tear_down(engine) def load_dataset(self, engine): """Test functionality to load a dataset.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() self.assertEquals(len(ds.columns), 3) self.assertEquals(len(ds_rows), 2) for row in ds_rows: self.assertTrue(isinstance(row.values[1], int)) # Ensure exception is thrown if dataset identifier is unknown with self.assertRaises(ValueError): self.vizual.load_dataset('unknown:uri') self.tear_down(engine) def move_column(self, engine): """Test functionality to move a column.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Swap first two columns c = col_ids[0] del col_ids[0] col_ids.insert(1, c) col_count, id1 = self.vizual.move_column( ds.identifier, ds.column_by_name('Name').identifier, 1) self.assertEquals(col_count, 1) self.assertNotEquals(id1, ds.identifier) ds = self.datastore.get_dataset(id1) ds_rows = ds.fetch_rows() self.assertEquals(ds.columns[0].name.upper(), 'Age'.upper()) self.assertEquals(ds.columns[1].name.upper(), 'Name'.upper()) self.assertEquals(ds.columns[2].name.upper(), 'Salary'.upper()) row = ds_rows[0] self.assertEquals(row.values[0], 23) self.assertEquals(row.values[1], 'Alice') self.assertEquals(row.values[2], '35K') row = ds_rows[1] self.assertEquals(row.values[0], 32) self.assertEquals(row.values[1], 'Bob') self.assertEquals(row.values[2], '30K') # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Swap last two columns c = col_ids[1] del col_ids[1] col_ids.append(c) col_count, id2 = self.vizual.move_column( id1, ds.column_by_name('Salary').identifier, 1) ds = self.datastore.get_dataset(id2) ds_rows = ds.fetch_rows() self.assertEquals(ds.columns[0].name.upper(), 'Age'.upper()) self.assertEquals(ds.columns[1].name.upper(), 'Salary'.upper()) self.assertEquals(ds.columns[2].name.upper(), 'Name'.upper()) row = ds_rows[0] self.assertEquals(row.values[0], 23) self.assertEquals(row.values[1], '35K') self.assertEquals(row.values[2], 'Alice') row = ds_rows[1] self.assertEquals(row.values[0], 32) self.assertEquals(row.values[1], '30K') self.assertEquals(row.values[2], 'Bob') # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Raise error if source column is out of bounds with self.assertRaises(ValueError): self.vizual.move_column(id2, 40, 1) # Raise error if target position is out of bounds with self.assertRaises(ValueError): self.vizual.move_column(id2, ds.column_by_name('Name').identifier, -1) with self.assertRaises(ValueError): self.vizual.move_column(id2, ds.column_by_name('Name').identifier, 4) self.tear_down(engine) def move_row(self, engine): """Test functionality to move a row.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Swap first two rows row_ids = [row for row in reversed(row_ids)] row_count, id1 = self.vizual.move_row(ds.identifier, 0, 1) self.assertEquals(row_count, 1) self.assertNotEquals(id1, ds.identifier) ds = self.datastore.get_dataset(id1) ds_rows = ds.fetch_rows() self.assertEquals(ds.columns[0].name.upper(), 'Name'.upper()) self.assertEquals(ds.columns[1].name.upper(), 'Age'.upper()) self.assertEquals(ds.columns[2].name.upper(), 'Salary'.upper()) row = ds_rows[0] self.assertEquals(row.values[0], 'Bob') self.assertEquals(row.values[1], 32) self.assertEquals(row.values[2], '30K') row = ds_rows[1] self.assertEquals(row.values[0], 'Alice') self.assertEquals(row.values[1], 23) self.assertEquals(row.values[2], '35K') # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Swap last two rows row_ids = [row for row in reversed(row_ids)] row_count, id2 = self.vizual.move_row(id1, 1, 0) ds = self.datastore.get_dataset(id2) ds_rows = ds.fetch_rows() self.assertEquals(ds.columns[0].name.upper(), 'Name'.upper()) self.assertEquals(ds.columns[1].name.upper(), 'Age'.upper()) self.assertEquals(ds.columns[2].name.upper(), 'Salary'.upper()) row = ds_rows[0] self.assertEquals(row.values[0], 'Alice') self.assertEquals(row.values[1], 23) self.assertEquals(row.values[2], '35K') row = ds_rows[1] self.assertEquals(row.values[0], 'Bob') self.assertEquals(row.values[1], 32) self.assertEquals(row.values[2], '30K') # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Move first row to the end row_count, id3 = self.vizual.move_row(id2, 0, 2) row_ids = [row for row in reversed(row_ids)] ds = self.datastore.get_dataset(id3) ds_rows = ds.fetch_rows() row = ds_rows[0] self.assertEquals(row.values[0], 'Bob') self.assertEquals(row.values[1], 32) self.assertEquals(row.values[2], '30K') row = ds_rows[1] self.assertEquals(row.values[0], 'Alice') self.assertEquals(row.values[1], 23) self.assertEquals(row.values[2], '35K') # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Raise error if source row is out of bounds with self.assertRaises(ValueError): self.vizual.move_row(id2, 3, 1) # Raise error if target position is out of bounds with self.assertRaises(ValueError): self.vizual.move_row(id2, 0, -1) with self.assertRaises(ValueError): self.vizual.move_row(id2, 1, 4) self.tear_down(engine) def rename_column(self, engine): """Test functionality to rename a column.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Rename first column to Firstname col_count, id1 = self.vizual.rename_column( ds.identifier, ds.column_by_name('Name').identifier, 'Firstname') self.assertEquals(col_count, 1) self.assertNotEquals(id1, ds.identifier) ds = self.datastore.get_dataset(id1) self.assertEquals(ds.columns[0].name.upper(), 'Firstname'.upper()) self.assertEquals(ds.columns[1].name.upper(), 'Age'.upper()) self.assertEquals(ds.columns[2].name.upper(), 'Salary'.upper()) col_count, id2 = self.vizual.rename_column( id1, ds.column_by_name('Age').identifier, 'BDate') ds = self.datastore.get_dataset(id2) ds_rows = ds.fetch_rows() self.assertEquals(ds.columns[0].name.upper(), 'Firstname'.upper()) self.assertEquals(ds.columns[1].name, 'BDate') self.assertEquals(ds.columns[2].name.upper(), 'Salary'.upper()) # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Ensure exception is thrown if dataset identifier is unknown with self.assertRaises(ValueError): self.vizual.rename_column('unknown:uri', 0, 'Firstname') # Ensure exception is thrown for invalid column id with self.assertRaises(ValueError): self.vizual.rename_column(id2, 500, 'BDate') self.tear_down(engine) def sequence_of_steps(self, engine): """Test sequence of calls that modify a dataset.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) count, ds_id = self.vizual.insert_row(ds.identifier, 1) ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.insert_column(ds_id, 3, 'HDate') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.update_cell( ds_id, ds.column_by_name('HDate').identifier, 0, '180') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.update_cell( ds_id, ds.column_by_name('HDate').identifier, 1, '160') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.rename_column( ds_id, ds.column_by_name('HDate').identifier, 'Height') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.update_cell( ds_id, ds.column_by_name('Height').identifier, 2, '170') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.move_row(ds_id, 1, 2) ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.update_cell( ds_id, ds.column_by_name('Name').identifier, 2, 'Carla') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.update_cell( ds_id, ds.column_by_name('Age').identifier, 2, '45') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.update_cell( ds_id, ds.column_by_name('Salary').identifier, 2, '56K') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.move_column( ds_id, ds.column_by_name('Salary').identifier, 4) ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.delete_column( ds_id, ds.column_by_name('Age').identifier) ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.delete_row(ds_id, 0) ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.delete_row(ds_id, 0) ds = self.datastore.get_dataset(ds_id) ds_rows = ds.fetch_rows() names = ['Name', 'Height', 'Salary'] self.assertEquals(len(ds.columns), len(names)) for i in range(len(names)): col = ds.columns[i] self.assertEquals(col.name.upper(), names[i].upper()) self.assertEquals([col.identifier for col in ds.columns], [0, 3, 2]) self.assertEquals(len(ds_rows), 1) self.assertEquals(ds_rows[0].values, ['Carla', '160', '56K']) self.assertEquals(ds_rows[0].identifier, 2) self.tear_down(engine) def sort_dataset(self, engine): """Test sorting a dataset.""" self.set_up(engine) # Create a new dataset fh = self.fs.upload_file(SORT_FILE) ds = self.vizual.load_dataset(fh.identifier) count, ds_id = self.vizual.sort_dataset(ds.identifier, [1, 2, 0], [False, False, True]) ds = self.datastore.get_dataset(ds_id) rows = ds.fetch_rows() names = ['Alice', 'Bob', 'Dave', 'Gertrud', 'Frank'] result = list() for row in rows: name = row.values[0] if name in names: result.append(name) for i in range(len(names)): self.assertEquals(names[i], result[i]) count, ds_id = self.vizual.sort_dataset(ds.identifier, [2, 1, 0], [True, False, True]) ds = self.datastore.get_dataset(ds_id) rows = ds.fetch_rows() names = ['Gertrud', 'Frank', 'Bob', 'Alice', 'Dave'] result = list() for row in rows: name = row.values[0] if name in names: result.append(name) for i in range(len(names)): self.assertEquals(names[i], result[i]) self.tear_down(engine) def update_cell(self, engine): """Test functionality to update a dataset cell.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Update cell [0, 0]. Ensure that one row was updated and a new # identifier is generated. Also ensure that the resulting datasets # has the new value in cell [0, 0] upd_rows, id1 = self.vizual.update_cell(ds.identifier, 0, 0, 'MyValue') self.assertEquals(upd_rows, 1) self.assertNotEquals(ds.identifier, id1) ds = self.datastore.get_dataset(id1) ds_rows = ds.fetch_rows() self.assertEquals(ds_rows[0].values[0], 'MyValue') upd_rows, id2 = self.vizual.update_cell( id1, ds.column_by_name('Name').identifier, 0, 'AValue') ds = self.datastore.get_dataset(id2) ds_rows = ds.fetch_rows() self.assertEquals(ds_rows[0].values[0], 'AValue') self.assertEquals(ds_rows[0].values[ds.column_index('Name')], 'AValue') # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Set value to None upd_rows, id3 = self.vizual.update_cell( id2, ds.column_by_name('Name').identifier, 0, None) ds = self.datastore.get_dataset(id3) ds_rows = ds.fetch_rows() self.assertIsNone(ds_rows[0].values[0]) self.assertIsNone(ds_rows[0].values[ds.column_index('Name')]) # Ensure exception is thrown if column is unknown with self.assertRaises(ValueError): self.vizual.update_cell(ds.identifier, 100, 0, 'MyValue') # Ensure exception is thrown if row index is out ouf bounds with self.assertRaises(ValueError): self.vizual.update_cell(ds.identifier, 0, 100, 'MyValue') self.tear_down(engine)
class TestVizierClient(unittest.TestCase): def setUp(self): """Delete metadata file if it exists.""" # Drop directorie self.tearDown() def tearDown(self): """Clean-up by dropping file server directory. """ if os.path.isdir(DATASTORE_DIR): shutil.rmtree(DATASTORE_DIR) if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) def test_fs_client(self): """Run tests for default engine and file server data store.""" self.fs = DefaultFileServer(SERVER_DIR) self.ds = FileSystemDataStore(DATASTORE_DIR) self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs))) def test_mem_client(self): """Run tests for default engine and in-memory data store.""" self.fs = DefaultFileServer(SERVER_DIR) self.ds = InMemDataStore() self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs))) def test_mimir_client(self): """Run tests for default engine and Mimir data store.""" mimir.initialize() self.fs = DefaultFileServer(SERVER_DIR) self.ds = MimirDataStore(DATASTORE_DIR) self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs))) mimir.finalize() def run_client_tests(self, client): """Test creating and updating a dataset via the client.""" ds = DatasetClient() ds.insert_column('Name') ds.insert_column('Age') ds.insert_row(['Alice', '23']) ds.insert_row(['Bob', '25']) client.create_dataset('MyDataset', ds) # Ensure the returned dataset contains the input data ds = client.get_dataset('MyDataset') self.assertEquals([c.name for c in ds.columns], ['Name', 'Age']) self.assertEquals([str(v) for v in ds.rows[0].values], ['Alice', '23']) self.assertEquals([str(v) for v in ds.rows[1].values], ['Bob', '25']) # Update dataset ds.rows[1].set_value('Age', '26') client.update_dataset('MyDataset', ds) ds = client.get_dataset('MyDataset') self.assertEquals([str(v) for v in ds.rows[1].values], ['Bob', '26']) # Value error when creating dataset with existing name with self.assertRaises(ValueError): client.create_dataset('MyDataset', ds) # Value error when retrieving unknown dataset with self.assertRaises(ValueError): client.get_dataset('SomeDataset') # Ensure the returned dataset contains the modified data client.rename_dataset('MyDataset', 'SomeDataset') ds = client.get_dataset('SomeDataset') client.update_dataset('SomeDataset', ds) # Move columns around ds = self.ds.load_dataset(self.fs.upload_file(CSV_FILE)) ds = client.create_dataset('people', DatasetClient(ds)) col_1 = [row.get_value(1) for row in ds.rows] ds.insert_column('empty', 2) ds = client.update_dataset('people', ds) col_2 = [row.get_value(2) for row in ds.rows] ds.move_column('empty', 1) ds = client.update_dataset('people', ds) for i in range(len(ds.rows)): row = ds.rows[i] self.assertEquals(row.values[1], col_2[i]) self.assertEquals(row.values[2], col_1[i]) # Rename ds.columns[1].name = 'allnone' ds = client.update_dataset('people', ds) for i in range(len(ds.rows)): row = ds.rows[i] self.assertEquals(row.get_value('allnone'), col_2[i]) self.assertEquals(row.values[2], col_1[i]) # Insert row row = ds.insert_row() row.set_value('Name', 'Zoe') ds = client.create_dataset('upd', ds) self.assertEquals(len(ds.rows), 3) r2 = ds.rows[2] self.assertEquals(r2.identifier, 2) self.assertEquals(r2.values, ['Zoe', None, None, None]) # Annotations ds = client.get_dataset('people') annotations = ds.rows[0].annotations('Age') annotations.add('user:comment', 'My Comment') ds = client.update_dataset('people', ds) annotations = ds.rows[0].annotations('Age').find_all('user:comment') self.assertEquals(len(annotations), 1) anno = annotations[0] self.assertEquals(anno.key, 'user:comment') self.assertEquals(anno.value, 'My Comment') ds.rows[0].annotations('Age').add('user:comment', 'Another Comment') ds = client.update_dataset('people', ds) annotations = ds.rows[0].annotations('Age').find_all('user:comment') self.assertEquals(len(annotations), 2) self.assertEquals(ds.rows[0].annotations('Age').keys(), ['user:comment']) values = [a.value for a in annotations] for val in ['My Comment', 'Another Comment']: self.assertTrue(val in values) ds.rows[0].annotations('Age').update(identifier=anno.identifier, key='user:issue', value='Some Issue') ds = client.update_dataset('people', ds) annotations = ds.rows[0].annotations('Age').find_all('user:comment') self.assertEquals(len(annotations), 1) keys = ds.rows[0].annotations('Age').keys() for key in ['user:comment', 'user:issue']: self.assertTrue(key in keys) values = [ a.value for a in ds.rows[0].annotations('Age').find_all('user:comment') ] for val in ['Another Comment']: self.assertTrue(val in values) values = [ a.value for a in ds.rows[0].annotations('Age').find_all('user:issue') ] for val in ['Some Issue']: self.assertTrue(val in values) ds.rows[0].annotations('Age').update(identifier=anno.identifier) ds = client.update_dataset('people', ds) annotations = ds.rows[0].annotations('Age').find_all('user:issue') self.assertEquals(len(annotations), 0) annotations = ds.rows[0].annotations('Age').find_all('user:comment') self.assertEquals(len(annotations), 1) # Delete column ds = client.get_dataset('people') ds.delete_column('Age') client.update_dataset('people', ds) ds = client.get_dataset('people') names = [col.name.upper() for col in ds.columns] self.assertTrue('NAME' in names) self.assertFalse('AGE' in names)