def test_create_and_delete_branch(self): """Test creating and deleting a new branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) self.assertEqual(len(vt.branches), 1) branch = vt.create_branch(properties={PROPERTY_NAME: 'My Branch'}) self.assertEqual(len(vt.branches), 2) self.assertIsNone(branch.head) self.assertEqual(len(branch.workflows), 0) vt = OSViztrailHandle.load_viztrail(base_path) self.assertEqual(len(vt.branches), 2) self.assertTrue(branch.identifier in vt.branches) self.assertEqual(vt.get_branch(branch.identifier).name, 'My Branch') branch = vt.get_branch(branch.identifier) self.assertIsNone(branch.head) self.assertEqual(len(branch.workflows), 0) # Ensure that all branch files exist branch_path = os.path.join(base_path, viztrail.FOLDER_BRANCHES, branch.identifier) self.assertTrue(os.path.isdir(branch_path)) self.assertTrue( os.path.isfile(os.path.join(branch_path, br.OBJ_METADATA))) self.assertTrue( os.path.isfile(os.path.join(branch_path, br.OBJ_PROPERTIES))) vt.delete_branch(branch.identifier) self.assertFalse(os.path.isdir(branch_path)) self.assertEqual(len(vt.branches), 1) vt = OSViztrailHandle.load_viztrail(base_path) self.assertEqual(len(vt.branches), 1)
def test_load_active(self): """Test loading workflows with active modules.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) branch = vt.get_default_branch() # Append ten modules for i in range(5): ts = get_current_time() command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, datasets=dict(), outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) self.assertEqual(len(branch.get_history()), (i + 1)) # This is a hack to simulate loading workflows with active modules # Change state of last two modules in branch head to an active state m = branch.get_head().modules[-2] m.state = MODULE_RUNNING m.write_module() m = branch.get_head().modules[-1] m.state = MODULE_RUNNING m.write_module() vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() self.assertTrue(branch.get_head().modules[0].is_success) self.assertTrue(branch.get_head().modules[1].is_success) self.assertTrue(branch.get_head().modules[2].is_success) self.assertTrue(branch.get_head().modules[3].is_canceled) self.assertTrue(branch.get_head().modules[4].is_canceled) # Change state of last module in second workflow to an active state m = branch.get_head().modules[1] m.state = MODULE_RUNNING m.write_module() vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() wf = branch.get_workflow(branch.get_history()[1].identifier) self.assertTrue(wf.modules[0].is_success) self.assertTrue(wf.modules[1].is_canceled)
def test_default_branch(self): """Test behaviour of the viztrail default branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) self.assertEqual(len(vt.branches), 1) branch = vt.get_default_branch() self.assertTrue(vt.is_default_branch(branch.identifier)) # Attempt to delete the branch that is the default should raise # ValueError with self.assertRaises(ValueError): vt.delete_branch(branch.identifier) # Attempt to delete the default branch folder should raise runtime error self.assertTrue(branch.is_default) with self.assertRaises(RuntimeError): branch.delete_branch() # Reload viztrail to ensure that default branch information is persisted vt = OSViztrailHandle.load_viztrail(base_path) self.assertEqual(len(vt.branches), 1) branch = vt.get_default_branch() # Attempt to delete the branch that is the default should raise # ValueError with self.assertRaises(ValueError): vt.delete_branch(branch.identifier) # Attempt to delete the default branch folder should raise runtime error self.assertTrue(branch.is_default) with self.assertRaises(RuntimeError): branch.delete_branch() # Add a new branch second_branch = vt.create_branch( properties={PROPERTY_NAME: 'My Branch'}) self.assertFalse(second_branch.is_default) self.assertNotEqual(vt.get_default_branch().identifier, second_branch.identifier) self.assertFalse(vt.is_default_branch(second_branch.identifier)) vt = OSViztrailHandle.load_viztrail(base_path) self.assertNotEqual(vt.get_default_branch().identifier, second_branch.identifier) # Set second branch as default branch second_branch = vt.set_default_branch(second_branch.identifier) self.assertTrue(second_branch.is_default) self.assertFalse(vt.get_branch(branch.identifier).is_default) self.assertEqual(vt.get_default_branch().identifier, second_branch.identifier) # It should be possible to delete the first branch now self.assertTrue(vt.delete_branch(branch.identifier)) vt = OSViztrailHandle.load_viztrail(base_path) self.assertIsNone(vt.get_branch(branch.identifier)) self.assertEqual(vt.get_default_branch().identifier, second_branch.identifier) # Set default branch to unknown branch should raise ValueError with self.assertRaises(ValueError): vt.set_default_branch(branch.identifier)
def test_single_append(self): """Test appending a single module to an empty viztrail branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties={}, base_path=base_path) branch = vt.get_default_branch() command = python_cell(source='print 2+2') ts = get_current_time() module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_SUCCESS, outputs=ModuleOutputs(stdout=[TextOutput('4')]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) wf = branch.append_workflow(modules=[module], action=ACTION_INSERT, command=command) # We expect that there exists a file for the workflow handle and one for # the new module self.assertTrue( os.path.isfile(os.path.join(branch.base_path, wf.identifier))) self.assertTrue( os.path.isfile(os.path.join(wf.modules[-1].module_path))) # Load the viztrail and get the module at the branch head vt = OSViztrailHandle.load_viztrail(base_path) module = vt.get_default_branch().get_head().modules[-1] self.assertEqual(module.external_form, 'print 2+2') self.assertEqual(module.outputs.stdout[-1].value, '4')
def test_create_load_delete(self): """Ensure that create and load works properly.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail( identifier='DEF', properties={PROPERTY_NAME: 'My Viztrail'}, base_path=base_path) # Ensure that all files and subfolders are created vt_folder = os.path.join(REPO_DIR, 'ABC') self.assertTrue(os.path.isdir(vt_folder)) self.assertTrue( os.path.isdir(os.path.join(vt_folder, viztrail.FOLDER_BRANCHES))) self.assertTrue( os.path.isdir(os.path.join(vt_folder, viztrail.FOLDER_MODULES))) self.assertTrue( os.path.isfile( os.path.join(vt_folder, viztrail.FOLDER_BRANCHES, viztrail.OBJ_BRANCHINDEX))) self.assertTrue( os.path.isfile(os.path.join(vt_folder, viztrail.OBJ_METADATA))) self.assertTrue( os.path.isfile(os.path.join(vt_folder, viztrail.OBJ_PROPERTIES))) # Update name property self.assertEqual(vt.identifier, 'DEF') self.assertEqual(vt.name, 'My Viztrail') vt.name = 'A Name' self.assertEqual(vt.name, 'A Name') # Load viztrail from disk vt = OSViztrailHandle.load_viztrail(base_path) self.assertEqual(vt.identifier, 'DEF') self.assertEqual(vt.name, 'A Name') # Delete viztrail vt.delete_viztrail() self.assertFalse(os.path.exists(vt_folder))
def test_load_with_dataset_delete(self): """Test loading workflows where each module creates a new dataset and deletes the previous dataset (except for the first module). """ base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties={}, base_path=base_path) branch = vt.get_default_branch() # Append ten modules for i in range(5): ts = get_current_time() deleted_datasets = list() if i > 0: deleted_datasets.append('DS' + str(i - 1)) command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(write={ 'DS' + str(i): DatasetDescriptor( identifier=str(i), name='DS' + str(i), columns=[ DatasetColumn(identifier=j, name=str(j)) for j in range(i) ], ) }, delete=deleted_datasets), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) vt = OSViztrailHandle.load_viztrail(base_path) workflow = vt.get_default_branch().get_head() self.assertEqual(len(workflow.modules), 5) datasets = {} for i in range(5): module = workflow.modules[i] datasets = module.provenance.get_database_state(datasets) self.assertEqual(len(datasets), 1) key = 'DS' + str(i) self.assertTrue(key in datasets) self.assertEqual(len(datasets[key].columns), i)
def __init__(self, base_path: str, object_store: Optional[ObjectStore] = None): """Initialize the repository from a configuration dictionary. Expects a dictionary that contains at least the base path for the repository. The definition of the object store is optional. If none is given the default object store will be used. Parameters --------- base_path: string Path to the base directory for viztrail resources object_store: vizier.core.io.base.ObjectStore, optional Store for objects that represent viztrail resources not """ # Raise an exception if the base directory argument is not given if base_path is None: raise ValueError('missing path for base directory') # Create the base directory if it does not exist self.base_path = base_path if not os.path.isdir(self.base_path): os.makedirs(self.base_path) # The object store element is optional. If not given the default object # store is used. if object_store is not None: self.object_store: ObjectStore = object_store else: self.object_store = DefaultObjectStore() # Initialize the viztrails index. Create the index file if it does not # exist. self.viztrails_index = self.object_store.join(self.base_path, OBJ_VIZTRAILINDEX) if not self.object_store.exists(self.viztrails_index): self.object_store.create_object(parent_folder=self.base_path, identifier=OBJ_VIZTRAILINDEX, content=list()) # Load viztrails and intialize the remaining instance variables by # calling the constructor of the super class self.viztrails: Dict[str, OSViztrailHandle] = dict() for identifier in cast( Dict[str, Any], self.object_store.read_object(self.viztrails_index)): vt = OSViztrailHandle.load_viztrail( base_path=self.object_store.join(self.base_path, identifier), object_store=self.object_store) # We just got the identifier from the repository... the loaded # viztrail had better exist. assert vt is not None self.viztrails[vt.identifier] = vt
def test_completed_append(self): """Test appending a completed workflow to a branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) branch = vt.get_default_branch() for i in range(10): ts = get_current_time() command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, datasets=dict(), outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) head_modules = branch.get_head().modules wf = branch.append_workflow(modules=head_modules[:-1], action=ACTION_DELETE, command=head_modules[-1].command) self.assertEqual(len(wf.modules), 9) self.assertEqual(wf.descriptor.identifier, '0000000A') self.assertEqual(wf.descriptor.action, ACTION_DELETE) self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON) self.assertEqual(wf.descriptor.command_id, PYTHON_CODE) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() history = branch.get_history() self.assertEqual(len(history), 11) wf = branch.get_head() self.assertEqual(len(wf.modules), 9) self.assertEqual(wf.descriptor.identifier, '0000000A') self.assertEqual(wf.descriptor.action, ACTION_DELETE) self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON) self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
def test_multi_append(self): """Test appending modules to viztrail branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) branch = vt.get_default_branch() # Append ten modules for i in range(10): ts = get_current_time() command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, datasets=dict(), outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) self.assertEqual(len(branch.get_history()), (i + 1)) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() history = branch.get_history() self.assertEqual(len(history), 10) for i in range(10): wf = branch.get_workflow(history[i].identifier) self.assertEqual(len(wf.modules), (i + 1)) for m in range(i + 1): module = wf.modules[m] self.assertEqual(module.external_form, 'print ' + str(m) + '+' + str(m)) self.assertEqual(module.outputs.stdout[-1].value, str(m + m))
def test_create_and_delete_branch_with_default_workflow(self): """Ensure that creating and loading branches works if the head workflow for the new branch is given. """ base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail( identifier='DEF', properties={PROPERTY_NAME: 'My Viztrail'}, base_path=base_path) self.assertEqual(vt.last_modified_at, vt.default_branch.last_modified_at) # Create five modules modules = list() for i in range(5): identifier = OSModuleHandle.create_module( command=python_cell(source='print ' + str(i)), external_form='TEST MODULE ' + str(i), state=MODULE_SUCCESS, outputs=ModuleOutputs(), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(), datasets=dict(), module_folder=vt.modules_folder, ).identifier modules.append(identifier) branch = vt.create_branch(properties={PROPERTY_NAME: 'My Branch'}, modules=modules) self.assertIsNotNone(branch.head) self.assertEqual(len(branch.workflows), 1) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_branch(branch.identifier) self.assertIsNotNone(branch.head) self.assertEqual(len(branch.workflows), 1) wf = branch.get_workflow(branch.head.identifier) self.assertEqual(len(wf.modules), 5) for i in range(5): self.assertEqual(wf.modules[i].external_form, 'TEST MODULE ' + str(i)) self.assertEqual(vt.last_modified_at, branch.last_modified_at) self.assertEqual(vt.last_modified_at, branch.last_modified_at)
def test_load_with_missing_modules(self): """Test loading workflows with active modules.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties=None, base_path=base_path) branch = vt.get_default_branch() # Append ten modules for i in range(5): ts = get_current_time() command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, datasets=dict(), outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) self.assertEqual(len(branch.get_history()), (i + 1)) # Delete the file for the third module to simulate an error condition in # which a file wasn't written properly os.remove(branch.head.modules[2].module_path) self.assertFalse(os.path.isfile(branch.head.modules[2].module_path)) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() self.assertTrue(branch.head.get_state().is_error) self.assertTrue(branch.head.modules[2].is_error)
def test_branch_cache(self): """Test appending a single module to an empty viztrail branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties={}, base_path=base_path) branch = vt.get_default_branch() command = python_cell(source='print 2+2') module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_SUCCESS, timestamp=ModuleTimestamp(created_at=get_current_time(), started_at=get_current_time(), finished_at=get_current_time()), outputs=ModuleOutputs(stdout=[TextOutput('4')]), provenance=ModuleProvenance(), module_folder=vt.modules_folder, object_store=vt.object_store) wf = branch.append_workflow(modules=[module], action=ACTION_INSERT, command=command) self.assertFalse(wf.identifier in [w.identifier for w in branch.cache]) for i in range(DEFAULT_CACHE_SIZE): module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_SUCCESS, timestamp=ModuleTimestamp(created_at=get_current_time(), started_at=get_current_time(), finished_at=get_current_time()), outputs=ModuleOutputs(stdout=[TextOutput('4')]), provenance=ModuleProvenance(), module_folder=vt.modules_folder, object_store=vt.object_store) branch.append_workflow(modules=branch.head.modules + [module], action=ACTION_INSERT, command=command) self.assertEqual(len(branch.cache), (i + 1)) self.assertTrue( wf.identifier in [w.identifier for w in branch.cache]) module = OSModuleHandle.create_module( command=command, external_form='print 2+2', state=MODULE_SUCCESS, timestamp=ModuleTimestamp(created_at=get_current_time(), started_at=get_current_time(), finished_at=get_current_time()), outputs=ModuleOutputs(stdout=[TextOutput('4')]), provenance=ModuleProvenance(), module_folder=vt.modules_folder, object_store=vt.object_store) branch.append_workflow(modules=branch.head.modules + [module], action=ACTION_INSERT, command=command) self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE) self.assertFalse(wf.identifier in [w.identifier for w in branch.cache]) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() self.assertEqual(len(branch.cache), 0) self.assertFalse(wf.identifier in [w.identifier for w in branch.cache]) branch.get_workflow(wf.identifier) self.assertTrue(wf.identifier in [w.identifier for w in branch.cache]) for wf_desc in branch.get_history(): if wf_desc.identifier != wf.identifier: branch.get_workflow(wf_desc.identifier) self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE) self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])