def test_workflow(self):
     """Run workflows for Mimir configurations."""
     # Create new work trail and retrieve the HEAD workflow of the default
     # branch
     project = self.engine.projects.create_project()
     branch_id = project.viztrail.default_branch.identifier
     fh = project.filestore.upload_file(CSV_FILE)
     cmd = load_dataset(dataset_name=DATASET_PEOPLE,
                        file={
                            pckg.FILE_ID: fh.identifier,
                            pckg.FILE_NAME: os.path.basename(CSV_FILE)
                        },
                        infer_types=True)
     self.engine.append_workflow_module(project_id=project.identifier,
                                        branch_id=branch_id,
                                        command=cmd)
     cmd = python_cell(PY_ADD_ONE)
     self.engine.append_workflow_module(project_id=project.identifier,
                                        branch_id=branch_id,
                                        command=cmd)
     wf = project.viztrail.default_branch.head
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     for m in wf.modules:
         print(m)
         self.assertTrue(m.is_success)
     cmd = python_cell(CREATE_DATASET_PY)
     self.engine.insert_workflow_module(
         project_id=project.identifier,
         branch_id=branch_id,
         before_module_id=wf.modules[0].identifier,
         command=cmd)
     wf = project.viztrail.default_branch.head
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     for m in wf.modules:
         print(m)
         self.assertTrue(m.is_success)
     datasets = wf.modules[0].provenance.write
     self.assertTrue(DATASET_FRIENDS in datasets)
     self.assertFalse(DATASET_PEOPLE in datasets)
     for m in wf.modules[1:]:
         datasets = m.provenance.get_database_state(datasets)
         self.assertTrue(DATASET_FRIENDS in datasets)
         self.assertTrue(DATASET_PEOPLE in datasets)
     ds = project.datastore.get_dataset(datasets[DATASET_PEOPLE].identifier)
     rows = ds.fetch_rows()
     self.assertEqual(rows[0].values, ['Alice', 24])
     self.assertEqual(rows[1].values, ['Bob', 32])
     ds = project.datastore.get_dataset(
         datasets[DATASET_FRIENDS].identifier)
     rows = ds.fetch_rows()
     self.assertEqual(rows[0].values, ['Yonder', 23])
     self.assertEqual(rows[1].values, ['Zoe', 34])
 def test_skip_modules(self):
     """Test replacing a module in a workflow where dome cells do not
     require to be re-executed because they access a different dataset.
     """
     project = self.engine.projects.create_project()
     branch_id = project.get_default_branch().identifier
     fh1 = project.filestore.upload_file(CSV_FILE)
     fh2 = project.filestore.upload_file(CSV_FILE)
     self.engine.append_workflow_module(
         project_id=project.identifier,
         branch_id=branch_id,
         command=load_dataset(dataset_name=DATASET_NAME,
                              file={pckg.FILE_ID: fh1.identifier}))
     self.engine.append_workflow_module(
         project_id=project.identifier,
         branch_id=branch_id,
         command=load_dataset(dataset_name=SECOND_DATASET_NAME,
                              file={pckg.FILE_ID: fh2.identifier}))
     for i in range(10):
         if i in [0, 2, 4, 6, 8]:
             cmd = command = python_cell(PY_ADD_ONE)
         else:
             cmd = command = python_cell(PY_ADD_SECOND)
         self.engine.append_workflow_module(project_id=project.identifier,
                                            branch_id=branch_id,
                                            command=cmd)
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     wf = project.viztrail.default_branch.head
     self.assertTrue(wf.get_state().is_success)
     datasets = [module.datasets for module in wf.modules[4:]]
     self.assert_module_count_is(project, 12)
     # Replace a module that updates the first datasets. All modules that
     # access the second dataset should remain unchanged.
     cmd = command = python_cell(PY_ADD_TEN)
     self.engine.replace_workflow_module(project_id=project.identifier,
                                         branch_id=branch_id,
                                         module_id=wf.modules[4].identifier,
                                         command=cmd)
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     wf = project.viztrail.default_branch.head
     self.assertTrue(wf.get_state().is_success)
     i = 0
     for module in wf.modules[4:]:
         self.assertNotEqual(datasets[i][DATASET_NAME].identifier,
                             module.datasets[DATASET_NAME].identifier)
         self.assertEqual(datasets[i][SECOND_DATASET_NAME].identifier,
                          module.datasets[SECOND_DATASET_NAME].identifier)
         i += 1
 def test_replace(self):
     """Test replacing a module."""
     project = self.engine.projects.create_project()
     branch_id = self.create_workflow(project)
     wf = project.viztrail.default_branch.head
     # Keep track of datasets in the completed workflow
     datasets = [m.datasets[DATASET_NAME].identifier for m in wf.modules]
     # Insert in the middle
     cmd = command = python_cell(PY_ADD_TEN)
     result = self.engine.replace_workflow_module(
         project_id=project.identifier,
         branch_id=branch_id,
         module_id=wf.modules[5].identifier,
         command=cmd)
     self.assertEqual(len(result), 6)
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     self.assert_module_count_is(project, 11)
     self.assert_value_is(project, 42)
     wf = project.viztrail.default_branch.head
     for i in range(5):
         self.assertEqual(datasets[i],
                          wf.modules[i].datasets[DATASET_NAME].identifier)
     for i in range(5, len(wf.modules)):
         self.assertFalse(
             wf.modules[i].datasets[DATASET_NAME].identifier in datasets)
     # Ensure that None is returned when attempting to replace a module in
     # an unknown branch
     result = self.engine.replace_workflow_module(
         project_id=project.identifier,
         branch_id='null',
         module_id=wf.modules[0].identifier,
         command=python_cell('print 2+2'))
     self.assertIsNone(result)
     # Replace at the start will leave all moules in error or canceled state
     result = self.engine.replace_workflow_module(
         project_id=project.identifier,
         branch_id=branch_id,
         module_id=wf.modules[0].identifier,
         command=cmd)
     self.assertEqual(len(result), 11)
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     self.assert_module_count_is(project, 11)
     wf = project.viztrail.default_branch.head
     self.assertTrue(wf.modules[0].is_error)
     for module in wf.modules[1:]:
         self.assertTrue(module.is_canceled)
Exemplo n.º 4
0
 def create_workflow(self, project):
     """Create a completed workflow by loading the data file and updating the
     age value of the first row ten times.
     """
     branch_id = project.viztrail.default_branch.identifier
     fh = project.filestore.upload_file(CSV_FILE)
     cmd = load_dataset(
         dataset_name=DATASET_NAME,
         file={pckg.FILE_ID: fh.identifier}
     )
     self.engine.append_workflow_module(
         project_id=project.identifier,
         branch_id=branch_id,
         command=cmd
     )
     for i in range(10):
         cmd = python_cell(PY_ADD_ONE)
         self.engine.append_workflow_module(
             project_id=project.identifier,
             branch_id=branch_id,
             command=cmd
         )
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     for module in project.viztrail.default_branch.head.modules:
         # print("--------=======--------")
         # print(module.command)
         # print(module.outputs)
         # print(module.provenance)
         if not module.is_success:
             print(module.outputs)
         self.assertTrue(module.is_success)
         self.assertTrue(DATASET_NAME in module.provenance.write)
     return branch_id
Exemplo n.º 5
0
 def test_simple_script(self):
     """Test running the simple python script."""
     cmd = python_cell(source='print 2+2', validate=True)
     result = PyCellTaskProcessor().compute(command_id=cmd.command_id,
                                            arguments=cmd.arguments,
                                            context=TaskContext(
                                                datastore=self.datastore,
                                                filestore=self.filestore,
                                                datasets=dict()))
     self.assertTrue(result.is_success)
     self.assertEqual(result.outputs.stdout[0].value, '4')
 def test_python_cell(self):
     """Test validation of the python cell command."""
     python_cell(source='ABC', validate=True)
     # Have an error raised if values of invalid data type are given
     with self.assertRaises(ValueError):
         python_cell(source=[], validate=True)
     # Get dictionary serialization of command arguments. Ensure that we
     # can create a valid command instance from the returned result.
     obj = python_cell(source='ABC', validate=True).arguments.to_list()
     ModuleCommand(package_id=pycell.PACKAGE_PYTHON,
                   command_id=pycell.PYTHON_CODE,
                   arguments=obj,
                   packages=PACKAGES)
     # Delete the only mandatory element from the serialization to ensure
     # that validation fails
     del obj[0]
     with self.assertRaises(ValueError):
         ModuleCommand(package_id=pycell.PACKAGE_PYTHON,
                       command_id=pycell.PYTHON_CODE,
                       arguments=obj,
                       packages=PACKAGES)
     # Add an unknown argument to ensure that the validation fails
     obj = python_cell(source='ABC', validate=True).arguments.to_list()
     obj.append(ARG(id='someUnknownLabel', value=''))
     with self.assertRaises(ValueError):
         ModuleCommand(package_id=pycell.PACKAGE_PYTHON,
                       command_id=pycell.PYTHON_CODE,
                       arguments=obj,
                       packages=PACKAGES)
Exemplo n.º 7
0
 def test_unknown_dataset_script(self):
     """Test running a script that accesses an unknown datasets."""
     fh = self.filestore.upload_file(CSV_FILE)
     ds = self.datastore.load_dataset(fh)
     cmd = python_cell(source=PRINT_UNKNOWN_DATASET_PY, validate=True)
     result = PyCellTaskProcessor().compute(command_id=cmd.command_id,
                                            arguments=cmd.arguments,
                                            context=TaskContext(
                                                datastore=self.datastore,
                                                filestore=self.filestore,
                                                project_id=6,
                                                artifacts={'people': ds}))
     self.assertFalse(result.is_success)
     self.assertTrue(result.provenance.read == {})
     self.assertTrue(result.provenance.write == {})
     self.assertEqual(len(result.outputs.stdout), 0)
     self.assertEqual(len(result.outputs.stderr), 1)
     # Running a similar script that catches the error schould be a success
     # and the access to the dataset should be recorded in the resulting
     # read provenance
     cmd = python_cell(source=PRINT_UNKNOWN_DATASET_PY_WITH_TRY_CATCH,
                       validate=True)
     result = PyCellTaskProcessor().compute(command_id=cmd.command_id,
                                            arguments=cmd.arguments,
                                            context=TaskContext(
                                                datastore=self.datastore,
                                                filestore=self.filestore,
                                                project_id=6,
                                                artifacts={'people': ds}))
     self.assertTrue(result.is_success)
     self.assertIsNotNone(result.provenance.read)
     self.assertIsNotNone(result.provenance.write)
     self.assertEqual(len(result.provenance.read), 1)
     self.assertEqual(len(result.provenance.write), 0)
     self.assertTrue('employees' in result.provenance.read)
     self.assertIsNone(result.provenance.read['employees'])
     self.assertEqual(len(result.outputs.stdout), 1)
     self.assertEqual(len(result.outputs.stderr), 0)
 def test_execute(self):
     """Test that the initial workflow is created correctly."""
     project = self.engine.projects.create_project()
     self.create_workflow(project)
     self.assertFalse(project.viztrail.default_branch.head.is_active)
     self.assert_module_count_is(project, 11)
     self.assert_value_is(project, 33)
     # Ensure that is returned when attempting to append to an
     # unknown branch
     result = self.engine.append_workflow_module(
         project_id=project.identifier,
         branch_id='null',
         command=python_cell('print 2+2'))
     self.assertIsNone(result)
Exemplo n.º 9
0
 def test_execute(self):
     """Test executing a sequence of supported commands."""
     context = dict()
     cmd = pycell.python_cell(source='print(2+2)', validate=True)
     controller = FakeWorkflowController()
     self.backend.execute_async(task=TaskHandle(task_id='000',
                                                project_id=self.PROJECT_ID,
                                                controller=controller),
                                command=cmd,
                                artifacts=context)
     time.sleep(3)
     self.assertEqual(controller.task_id, '000')
     self.assertEqual(controller.state, 'SUCCESS')
     self.assertEqual(controller.outputs.stdout[0].value, '4')
 def test_completed_append(self):
     """Test appending a completed workflow to a branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
     head_modules = branch.get_head().modules
     wf = branch.append_workflow(modules=head_modules[:-1],
                                 action=ACTION_DELETE,
                                 command=head_modules[-1].command)
     self.assertEqual(len(wf.modules), 9)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 11)
     wf = branch.get_head()
     self.assertEqual(len(wf.modules), 9)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
Exemplo n.º 11
0
 def test_running(self):
     """Update module state from pending to running."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         timestamp=ModuleTimestamp(),
         datasets={'DS1': DS1},
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2')},
             resources={'fileid': '0123456789'}))
     self.assertTrue(module.is_pending)
     module.set_running(external_form='TEST MODULE')
     self.assertTrue(module.is_running)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertEqual(len(module.datasets), 0)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     # Read module from object store and ensure that tall changes have been
     # materialized properly
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_running)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertEqual(len(module.datasets), 0)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     # Set running with all optional parameters
     module.set_running(started_at=module.timestamp.created_at,
                        external_form='Some form')
     self.assertEqual(module.timestamp.started_at,
                      module.timestamp.created_at)
     self.assertEqual(module.external_form, 'Some form')
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertEqual(module.timestamp.started_at,
                      module.timestamp.created_at)
     self.assertEqual(module.external_form, 'Some form')
Exemplo n.º 12
0
 def test_cancel(self) -> None:
     """Test executing a sequence of supported commands."""
     context: Dict[str, ArtifactDescriptor] = dict()
     cmd = pycell.python_cell(source='import time\ntime.sleep(5)',
                              validate=True)
     controller = FakeWorkflowController()
     self.backend.execute_async(task=TaskHandle(task_id='000',
                                                project_id=self.PROJECT_ID,
                                                controller=controller),
                                command=cmd,
                                artifacts=context)
     time.sleep(1)
     self.backend.cancel_task('000')
     time.sleep(5)
     self.assertIsNone(controller.task_id)
     self.assertIsNone(controller.state)
Exemplo n.º 13
0
 def test_create_dataset_script(self):
     """Test running a script that creates a new datasets."""
     cmd = python_cell(source=CREATE_DATASET_PY, validate=True)
     result = PyCellTaskProcessor().compute(command_id=cmd.command_id,
                                            arguments=cmd.arguments,
                                            context=TaskContext(
                                                datastore=self.datastore,
                                                filestore=self.filestore))
     self.assertTrue(result.is_success)
     self.assertIsNotNone(result.provenance.read)
     self.assertIsNotNone(result.provenance.write)
     self.assertEqual(len(result.provenance.read), 0)
     self.assertEqual(len(result.provenance.write), 1)
     self.assertTrue('people' in result.provenance.write)
     self.assertIsNotNone(result.provenance.write['people'])
     self.assertEqual(len(result.outputs.stdout), 1)
     self.assertEqual(len(result.outputs.stderr), 0)
     self.assertEqual(result.outputs.stdout[0].value, 'Alice\nBob')
 def test_multi_append(self):
     """Test appending modules to viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 10)
     for i in range(10):
         wf = branch.get_workflow(history[i].identifier)
         self.assertEqual(len(wf.modules), (i + 1))
         for m in range(i + 1):
             module = wf.modules[m]
             self.assertEqual(module.external_form,
                              'print ' + str(m) + '+' + str(m))
             self.assertEqual(module.outputs.stdout[-1].value, str(m + m))
 def create_workflow(self, project):
     """Create a completed workflow by loading the data file and updating the
     age value of the first row ten times.
     """
     branch_id = project.viztrail.default_branch.identifier
     fh = project.filestore.upload_file(CSV_FILE)
     cmd = load_dataset(dataset_name=DATASET_NAME,
                        file={pckg.FILE_ID: fh.identifier})
     self.engine.append_workflow_module(project_id=project.identifier,
                                        branch_id=branch_id,
                                        command=cmd)
     for i in range(10):
         cmd = python_cell(PY_ADD_ONE)
         self.engine.append_workflow_module(project_id=project.identifier,
                                            branch_id=branch_id,
                                            command=cmd)
     while project.viztrail.default_branch.head.is_active:
         time.sleep(0.1)
     return branch_id