Exemplo n.º 1
0
    def from_dict(obj):
        """Get a workflow module resource instance from the dictionary
        representation returned by the vizier web service.

        Parameters
        ----------
        obj: dict
            Dictionary serialization of a workflow module handle

        Returns
        -------
        vizier.api.client.resources.module.ModuleResource
        """
        # Create a list of outputs.
        outputs = list()
        if 'outputs' in obj:
            for out in obj['outputs']['stdout']:
                if out['type'] in [OUTPUT_TEXT, OUTPUT_HTML]:
                    outputs.append(out['value'])
            for out in obj['outputs']['stderr']:
                outputs.append(out['value'])
        # Create the timestamp
        ts = obj['timestamps']
        timestamp = ModuleTimestamp(created_at=to_datetime(ts['createdAt']))
        if 'startedAt' in ts:
            timestamp.started_at = to_datetime(ts['startedAt'])
        if 'finishedAt' in ts:
            timestamp.finished_at = to_datetime(ts['finishedAt'])
        # Create dictionary of available datasets
        datasets = dict()
        if 'datasets' in obj:
            for ds in obj['datasets']:
                datasets[ds['name']] = ds['id']
        dataobjects = dict()
        if 'dataobjects' in obj:
            for dobj in obj['dataobjects']:
                dataobjects[dobj['name']] = dobj['id']
        charts = dict()
        if 'charts' in obj:
            for ch in obj['charts']:
                c_handle = ChartHandle.from_dict(ch)
                charts[c_handle.name] = c_handle
        return ModuleResource(identifier=obj['id'],
                              state=to_external_form(obj['state']),
                              external_form=obj['text'],
                              outputs=outputs,
                              datasets=datasets,
                              dataobjects=dataobjects,
                              charts=charts,
                              timestamp=timestamp,
                              links=deserialize.HATEOAS(links=obj['links']))
 def test_single_append(self):
     """Test appending a single module to an empty viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     command = python_cell(source='print 2+2')
     ts = get_current_time()
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(created_at=ts,
                                   started_at=ts,
                                   finished_at=ts),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     wf = branch.append_workflow(modules=[module],
                                 action=ACTION_INSERT,
                                 command=command)
     # We expect that there exists a file for the workflow handle and one for
     # the new module
     self.assertTrue(
         os.path.isfile(os.path.join(branch.base_path, wf.identifier)))
     self.assertTrue(
         os.path.isfile(os.path.join(wf.modules[-1].module_path)))
     # Load the viztrail and get the module at the branch head
     vt = OSViztrailHandle.load_viztrail(base_path)
     module = vt.get_default_branch().get_head().modules[-1]
     self.assertEqual(module.external_form, 'print 2+2')
     self.assertEqual(module.outputs.stdout[-1].value, '4')
 def test_load_with_dataset_delete(self):
     """Test loading workflows where each module creates a new dataset and
     deletes the previous dataset (except for the first module).
     """
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         deleted_datasets = list()
         if i > 0:
             deleted_datasets.append('DS' + str(i - 1))
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(write={
                 'DS' + str(i):
                 DatasetDescriptor(
                     identifier=str(i),
                     name='DS' + str(i),
                     columns=[
                         DatasetColumn(identifier=j, name=str(j))
                         for j in range(i)
                     ],
                 )
             },
                                         delete=deleted_datasets),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
     vt = OSViztrailHandle.load_viztrail(base_path)
     workflow = vt.get_default_branch().get_head()
     self.assertEqual(len(workflow.modules), 5)
     datasets = {}
     for i in range(5):
         module = workflow.modules[i]
         datasets = module.provenance.get_database_state(datasets)
         self.assertEqual(len(datasets), 1)
         key = 'DS' + str(i)
         self.assertTrue(key in datasets)
         self.assertEqual(len(datasets[key].columns), i)
Exemplo n.º 4
0
 def test_timestamps(self):
     """Test reading and writing modules with different timestamp values."""
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     # Test timestamps
     created_at = m.timestamp.created_at
     started_at = to_datetime('2018-11-26T13:00:00.000000')
     m.timestamp.started_at = started_at
     m.write_module()
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(m.timestamp.created_at, created_at)
     self.assertEqual(m.timestamp.started_at, started_at)
     finished_at = to_datetime('2018-11-26T13:00:00.000010')
     m.timestamp.created_at = finished_at
     m.timestamp.finished_at = finished_at
     m.write_module()
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(m.timestamp.created_at, finished_at)
     self.assertEqual(m.timestamp.started_at, started_at)
     self.assertEqual(m.timestamp.finished_at, finished_at)
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(created_at=created_at,
                                   started_at=started_at),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(m.timestamp.created_at, created_at)
     self.assertEqual(m.timestamp.started_at, started_at)
     self.assertIsNone(m.timestamp.finished_at)
 def test_load_active(self):
     """Test loading workflows with active modules."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     # This is a hack to simulate loading workflows with active modules
     # Change state of last two modules in branch head to an active state
     m = branch.get_head().modules[-2]
     m.state = MODULE_RUNNING
     m.write_module()
     m = branch.get_head().modules[-1]
     m.state = MODULE_RUNNING
     m.write_module()
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     self.assertTrue(branch.get_head().modules[0].is_success)
     self.assertTrue(branch.get_head().modules[1].is_success)
     self.assertTrue(branch.get_head().modules[2].is_success)
     self.assertTrue(branch.get_head().modules[3].is_canceled)
     self.assertTrue(branch.get_head().modules[4].is_canceled)
     # Change state of last module in second workflow to an active state
     m = branch.get_head().modules[1]
     m.state = MODULE_RUNNING
     m.write_module()
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     wf = branch.get_workflow(branch.get_history()[1].identifier)
     self.assertTrue(wf.modules[0].is_success)
     self.assertTrue(wf.modules[1].is_canceled)
Exemplo n.º 6
0
 def test_datasets(self):
     """Test reading and writing modules with dataset information."""
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(write=DATASETS),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR,
         datasets=DATASETS)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path,
                                    prev_state=dict())
     self.assertEqual(len(m.datasets), 0)
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_SUCCESS,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(write=DATASETS),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR,
         datasets=DATASETS)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path,
                                    prev_state=dict())
     self.assertEqual(len(m.datasets), 2)
     self.assertEqual(m.datasets['DS1'].identifier, 'ID1')
     self.assertEqual(len(m.datasets['DS1'].columns), 0)
     self.assertEqual(m.datasets['DS1'].row_count, 0)
     ds2 = m.datasets['DS2']
     self.assertEqual(ds2.identifier, 'ID2')
     self.assertEqual(len(ds2.columns), 2)
     col0 = ds2.columns[0]
     self.assertEqual(col0.identifier, 0)
     self.assertEqual(col0.name, 'ABC')
     self.assertEqual(col0.data_type, 'int')
     col1 = ds2.columns[1]
     self.assertEqual(col1.identifier, 1)
     self.assertEqual(col1.name, 'xyz')
     self.assertEqual(col1.data_type, 'real')
     self.assertEqual(ds2.row_count, 100)
Exemplo n.º 7
0
    def __init__(self,
                 command: ModuleCommand,
                 external_form: Optional[str],
                 identifier: Optional[str] = None,
                 state: int = MODULE_PENDING,
                 timestamp: ModuleTimestamp = ModuleTimestamp(),
                 outputs: ModuleOutputs = ModuleOutputs(),
                 provenance: ModuleProvenance = ModuleProvenance()):
        """Initialize the module handle. For new modules, datasets and outputs
        are initially empty.

        Parameters
        ----------
        command : vizier.viztrail.command.ModuleCommand
            Specification of the module (i.e., package, name, and arguments)
        external_form: string
            Printable representation of module command
        identifier : string, optional
            Unique module identifier
        state: int
            Module state (one of PENDING, RUNNING, CANCELED, ERROR, SUCCESS)
        timestamp: vizier.viztrail.module.timestamp.ModuleTimestamp, optional
            Module timestamp
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Module output streams STDOUT and STDERR
        provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional
            Provenance information about datasets that were read and writen by
            previous execution of the module.
        """
        super(ModuleHandle, self).__init__(
            state=state if not state is None else MODULE_PENDING)
        self.identifier = identifier
        self.command = command
        self.external_form = external_form
        self.outputs = outputs if not outputs is None else ModuleOutputs()
        self.provenance = provenance if not provenance is None else ModuleProvenance(
        )
        self.timestamp = timestamp if not timestamp is None else ModuleTimestamp(
        )
 def test_init(self):
     """Ensure that the module timestamp object is initialized properly."""
     # Ensure that the created_at timestamp is set if not given
     ts = ModuleTimestamp()
     self.assertIsNotNone(ts.created_at)
     self.assertIsNone(ts.started_at)
     self.assertIsNone(ts.finished_at)
     created_at = ts.created_at
     # Ensure created_at and started_at are initialize properly
     ts = ModuleTimestamp(created_at=created_at, started_at=created_at)
     self.assertIsNotNone(ts.created_at)
     self.assertIsNotNone(ts.started_at)
     self.assertIsNone(ts.finished_at)
     self.assertEqual(ts.created_at, created_at)
     self.assertEqual(ts.started_at, created_at)
     # Ensure that ValueError is raised if created_at is None but one of
     # the other two timestamp arguments is not
     with self.assertRaises(ValueError):
         ModuleTimestamp(started_at=created_at)
     with self.assertRaises(ValueError):
         ModuleTimestamp(finished_at=created_at)
     with self.assertRaises(ValueError):
         ModuleTimestamp(started_at=created_at, finished_at=created_at)
 def test_completed_append(self):
     """Test appending a completed workflow to a branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
     head_modules = branch.get_head().modules
     wf = branch.append_workflow(modules=head_modules[:-1],
                                 action=ACTION_DELETE,
                                 command=head_modules[-1].command)
     self.assertEqual(len(wf.modules), 9)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 11)
     wf = branch.get_head()
     self.assertEqual(len(wf.modules), 9)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
Exemplo n.º 10
0
 def test_running(self):
     """Update module state from pending to running."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         timestamp=ModuleTimestamp(),
         datasets={'DS1': DS1},
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2')},
             resources={'fileid': '0123456789'}))
     self.assertTrue(module.is_pending)
     module.set_running(external_form='TEST MODULE')
     self.assertTrue(module.is_running)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertEqual(len(module.datasets), 0)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     # Read module from object store and ensure that tall changes have been
     # materialized properly
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_running)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertEqual(len(module.datasets), 0)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     # Set running with all optional parameters
     module.set_running(started_at=module.timestamp.created_at,
                        external_form='Some form')
     self.assertEqual(module.timestamp.started_at,
                      module.timestamp.created_at)
     self.assertEqual(module.external_form, 'Some form')
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertEqual(module.timestamp.started_at,
                      module.timestamp.created_at)
     self.assertEqual(module.external_form, 'Some form')
 def test_state(self):
     """Ensure that only one of the state flag is True at the same time."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         timestamp=ModuleTimestamp(),
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2',
                                             name='ID2')}))
     # Pending
     self.assertTrue(module.is_pending)
     self.assertFalse(module.is_canceled)
     self.assertFalse(module.is_error)
     self.assertFalse(module.is_running)
     self.assertFalse(module.is_success)
     # Running
     module.set_running(external_form='TEST MODULE')
     self.assertFalse(module.is_pending)
     self.assertFalse(module.is_canceled)
     self.assertFalse(module.is_error)
     self.assertTrue(module.is_running)
     self.assertFalse(module.is_success)
     # Canceled
     module.set_canceled()
     self.assertFalse(module.is_pending)
     self.assertTrue(module.is_canceled)
     self.assertFalse(module.is_error)
     self.assertFalse(module.is_running)
     self.assertFalse(module.is_success)
     # Error
     module.set_error()
     self.assertFalse(module.is_pending)
     self.assertFalse(module.is_canceled)
     self.assertTrue(module.is_error)
     self.assertFalse(module.is_running)
     self.assertFalse(module.is_success)
     # Success
     module.set_success()
     self.assertFalse(module.is_pending)
     self.assertFalse(module.is_canceled)
     self.assertFalse(module.is_error)
     self.assertFalse(module.is_running)
     self.assertTrue(module.is_success)
Exemplo n.º 12
0
    def __init__(self, 
            identifier: str, 
            command: ModuleCommand, 
            external_form: str, 
            module_path: str,
            state: int = mstate.MODULE_PENDING, 
            timestamp: ModuleTimestamp = ModuleTimestamp(), 
            outputs: ModuleOutputs = ModuleOutputs(),
            provenance: ModuleProvenance = ModuleProvenance(), 
            object_store: ObjectStore = DefaultObjectStore()
        ):
        """Initialize the module handle. For new modules, datasets and outputs
        are initially empty.

        Parameters
        ----------
        identifier : string
            Unique module identifier
        command : vizier.viztrail.command.ModuleCommand
            Specification of the module (i.e., package, name, and arguments)
        external_form: string
            Printable representation of module command
        module_path: string
            Path to module resource in object store
        state: int
            Module state (one of PENDING, RUNNING, CANCELED, ERROR, SUCCESS)
        timestamp: vizier.viztrail.module.timestamp.ModuleTimestamp, optional
            Module timestamp
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Module output streams STDOUT and STDERR
        provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional
            Provenance information about datasets that were read and writen by
            previous execution of the module.
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources
        """
        super(OSModuleHandle, self).__init__(
            identifier=identifier,
            command=command,
            external_form=external_form,
            state=state,
            timestamp=timestamp,
            outputs= outputs,
            provenance=provenance,
        )
        self.module_path = module_path
        self.object_store = object_store
Exemplo n.º 13
0
    def __init__(self,
                 command,
                 external_form,
                 identifier=None,
                 state=None,
                 timestamp=None,
                 datasets=None,
                 outputs=None,
                 provenance=None):
        """Initialize the module handle. For new modules, datasets and outputs
        are initially empty.

        Parameters
        ----------
        command : vizier.viztrail.command.ModuleCommand
            Specification of the module (i.e., package, name, and arguments)
        external_form: string
            Printable representation of module command
        identifier : string, optional
            Unique module identifier
        state: int
            Module state (one of PENDING, RUNNING, CANCELED, ERROR, SUCCESS)
        timestamp: vizier.viztrail.module.timestamp.ModuleTimestamp, optional
            Module timestamp
        datasets : dict(vizier.datastore.dataset.DatasetDescriptor), optional
            Dictionary of resulting datasets. The user-specified name is the key
            and the unique dataset identifier the value.
        outputs: vizier.viztrail.module.output.ModuleOutputs, optional
            Module output streams STDOUT and STDERR
        provenance: vizier.viztrail.module.provenance.ModuleProvenance, optional
            Provenance information about datasets that were read and writen by
            previous execution of the module.
        """
        super(ModuleHandle, self).__init__(
            state=state if not state is None else MODULE_PENDING)
        self.identifier = identifier
        self.command = command
        self.external_form = external_form
        self.datasets = datasets if not datasets is None else dict()
        self.outputs = outputs if not outputs is None else ModuleOutputs()
        self.provenance = provenance if not provenance is None else ModuleProvenance(
        )
        self.timestamp = timestamp if not timestamp is None else ModuleTimestamp(
        )
 def test_multi_append(self):
     """Test appending modules to viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 10)
     for i in range(10):
         wf = branch.get_workflow(history[i].identifier)
         self.assertEqual(len(wf.modules), (i + 1))
         for m in range(i + 1):
             module = wf.modules[m]
             self.assertEqual(module.external_form,
                              'print ' + str(m) + '+' + str(m))
             self.assertEqual(module.outputs.stdout[-1].value, str(m + m))
Exemplo n.º 15
0
 def test_create_and_delete_branch_with_default_workflow(self):
     """Ensure that creating and loading branches works if the head workflow
     for the new branch is given.
     """
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(
         identifier='DEF',
         properties={PROPERTY_NAME: 'My Viztrail'},
         base_path=base_path)
     self.assertEqual(vt.last_modified_at,
                      vt.default_branch.last_modified_at)
     # Create five modules
     modules = list()
     for i in range(5):
         identifier = OSModuleHandle.create_module(
             command=python_cell(source='print ' + str(i)),
             external_form='TEST MODULE ' + str(i),
             state=MODULE_SUCCESS,
             outputs=ModuleOutputs(),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(),
             datasets=dict(),
             module_folder=vt.modules_folder,
         ).identifier
         modules.append(identifier)
     branch = vt.create_branch(properties={PROPERTY_NAME: 'My Branch'},
                               modules=modules)
     self.assertIsNotNone(branch.head)
     self.assertEqual(len(branch.workflows), 1)
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_branch(branch.identifier)
     self.assertIsNotNone(branch.head)
     self.assertEqual(len(branch.workflows), 1)
     wf = branch.get_workflow(branch.head.identifier)
     self.assertEqual(len(wf.modules), 5)
     for i in range(5):
         self.assertEqual(wf.modules[i].external_form,
                          'TEST MODULE ' + str(i))
     self.assertEqual(vt.last_modified_at, branch.last_modified_at)
     self.assertEqual(vt.last_modified_at, branch.last_modified_at)
 def test_load_with_missing_modules(self):
     """Test loading workflows with active modules."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties=None,
                                           base_path=base_path)
     branch = vt.get_default_branch()
     # Append ten modules
     for i in range(5):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             datasets=dict(),
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.get_history()), (i + 1))
     # Delete the file for the third module to simulate an error condition in
     # which a file wasn't written properly
     os.remove(branch.head.modules[2].module_path)
     self.assertFalse(os.path.isfile(branch.head.modules[2].module_path))
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     self.assertTrue(branch.head.get_state().is_error)
     self.assertTrue(branch.head.modules[2].is_error)
 def test_safe_write(self):
     """Update module state with write error."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         timestamp=ModuleTimestamp(),
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2',
                                             name='ID2')}))
     self.assertTrue(module.is_pending)
     module.set_running(external_form='TEST MODULE')
     self.assertTrue(module.is_running)
     module.set_success(outputs=ModuleOutputs(stderr=[None]))
     self.assertTrue(module.is_error)
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_running)
Exemplo n.º 18
0
 def test_create_branch_of_active_workflow(self):
     """Ensure thatan exception is raised when attempting to branch of a
     workflow with active modules. None of the branch resources should be
     created.
     """
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(
         identifier='DEF',
         properties={PROPERTY_NAME: 'My Viztrail'},
         base_path=base_path)
     # Create one branch
     branch = vt.create_branch(properties={PROPERTY_NAME: 'My Branch'})
     branch_path = os.path.join(base_path, viztrail.FOLDER_BRANCHES,
                                branch.identifier)
     self.assertTrue(os.path.isdir(branch_path))
     files = os.listdir(os.path.join(base_path, viztrail.FOLDER_BRANCHES))
     # Create five modules. The last one is active
     modules = list()
     for i in range(5):
         m = OSModuleHandle.create_module(
             command=python_cell(source='print ' + str(i)),
             external_form='TEST MODULE ' + str(i),
             state=MODULE_SUCCESS,
             outputs=ModuleOutputs(),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(),
             datasets=dict(),
             module_folder=vt.modules_folder,
         )
         modules.append(m.identifier)
     m.set_running(external_form='TEST MODULE')
     with self.assertRaises(ValueError):
         vt.create_branch(properties={PROPERTY_NAME: 'My Branch'},
                          modules=modules)
     # Ensure that no additional entry in the branches folder is created
     self.assertEqual(
         len(files),
         len(os.listdir(os.path.join(base_path, viztrail.FOLDER_BRANCHES))))
Exemplo n.º 19
0
 def test_read_write_module(self):
     """Test reading and writing modules."""
     mod0 = OSModuleHandle.create_module(
         command=create_plot(
             dataset_name='dataset',
             chart_name='My Chart',
             series=[{
                 'column': 1,
                 'range': '0:50',
                 'label': 'A'
             }, {
                 'column': 2,
                 'range': '51:100',
                 'label': 'B'
             }, {
                 'column': 3,
                 'label': 'C'
             }, {
                 'column': 4
             }],
             chart_type='bar',
             chart_grouped=False,
             xaxis_range='0:100',
             xaxis_column=None,
         ),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR,
     )
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertTrue(os.path.isfile(m.module_path))
     self.assertEqual(m.external_form, 'TEST MODULE')
     self.assertTrue(m.is_pending)
Exemplo n.º 20
0
 def test_branch_cache(self):
     """Test appending a single module to an empty viztrail branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     command = python_cell(source='print 2+2')
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         timestamp=ModuleTimestamp(created_at=get_current_time(),
                                   started_at=get_current_time(),
                                   finished_at=get_current_time()),
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     wf = branch.append_workflow(modules=[module],
                                 action=ACTION_INSERT,
                                 command=command)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     for i in range(DEFAULT_CACHE_SIZE):
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print 2+2',
             state=MODULE_SUCCESS,
             timestamp=ModuleTimestamp(created_at=get_current_time(),
                                       started_at=get_current_time(),
                                       finished_at=get_current_time()),
             outputs=ModuleOutputs(stdout=[TextOutput('4')]),
             provenance=ModuleProvenance(),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         branch.append_workflow(modules=branch.head.modules + [module],
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.cache), (i + 1))
         self.assertTrue(
             wf.identifier in [w.identifier for w in branch.cache])
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         timestamp=ModuleTimestamp(created_at=get_current_time(),
                                   started_at=get_current_time(),
                                   finished_at=get_current_time()),
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     branch.append_workflow(modules=branch.head.modules + [module],
                            action=ACTION_INSERT,
                            command=command)
     self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     self.assertEqual(len(branch.cache), 0)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     branch.get_workflow(wf.identifier)
     self.assertTrue(wf.identifier in [w.identifier for w in branch.cache])
     for wf_desc in branch.get_history():
         if wf_desc.identifier != wf.identifier:
             branch.get_workflow(wf_desc.identifier)
     self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
Exemplo n.º 21
0
 def test_cache_active_workflows(self):
     """Test caching for workflows that are active."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     command = python_cell(source='print 2+2')
     pending_module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_PENDING,
         timestamp=ModuleTimestamp(created_at=get_current_time()),
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     wf = branch.append_workflow(modules=[pending_module],
                                 action=ACTION_INSERT,
                                 command=command)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     for i in range(DEFAULT_CACHE_SIZE):
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print 2+2',
             state=MODULE_SUCCESS,
             timestamp=ModuleTimestamp(created_at=get_current_time(),
                                       started_at=get_current_time(),
                                       finished_at=get_current_time()),
             outputs=ModuleOutputs(stdout=[TextOutput('4')]),
             provenance=ModuleProvenance(),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         branch.append_workflow(modules=branch.head.modules + [module],
                                action=ACTION_INSERT,
                                command=command)
         self.assertEqual(len(branch.cache), (i + 1))
         self.assertTrue(
             wf.identifier in [w.identifier for w in branch.cache])
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         timestamp=ModuleTimestamp(created_at=get_current_time(),
                                   started_at=get_current_time(),
                                   finished_at=get_current_time()),
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     branch.append_workflow(modules=branch.head.modules + [module],
                            action=ACTION_INSERT,
                            command=command)
     # The active workflow should not be removed
     self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE + 1)
     self.assertTrue(wf.identifier in [w.identifier for w in branch.cache])
     # Set module state to error and append another workflow. This should
     # evict two workflows
     second_wf = branch.cache[1]
     third_wf = branch.cache[2]
     pending_module.set_error()
     module = OSModuleHandle.create_module(
         command=command,
         external_form='print 2+2',
         state=MODULE_SUCCESS,
         timestamp=ModuleTimestamp(created_at=get_current_time(),
                                   started_at=get_current_time(),
                                   finished_at=get_current_time()),
         outputs=ModuleOutputs(stdout=[TextOutput('4')]),
         provenance=ModuleProvenance(),
         module_folder=vt.modules_folder,
         object_store=vt.object_store)
     branch.append_workflow(modules=branch.head.modules + [module],
                            action=ACTION_INSERT,
                            command=command)
     # The active workflow should not be removed
     self.assertEqual(len(branch.cache), DEFAULT_CACHE_SIZE)
     self.assertFalse(wf.identifier in [w.identifier for w in branch.cache])
     self.assertFalse(
         second_wf.identifier in [w.identifier for w in branch.cache])
     self.assertTrue(
         third_wf.identifier in [w.identifier for w in branch.cache])
Exemplo n.º 22
0
    def append_workflow_module(self, project_id, branch_id, command):
        """Append module to the workflow at the head of the given viztrail
        branch. The modified workflow will be executed. The result is the new
        head of the branch.

        Returns the handle for the new module in the modified workflow. The
        result is None if the specified project or branch do not exist.

        Parameters
        ----------
        project_id: string
            Unique project identifier
        branch_id : string
            Unique branch identifier
        command : vizier.viztrail.command.ModuleCommand
            Specification of the command that is to be executed by the appended
            workflow module

        Returns
        -------
        vizier.viztrail.module.base.ModuleHandle
        """
        with self.backend.lock:
            # Get the handle for the specified branch
            branch = self.projects.get_branch(project_id=project_id,
                                              branch_id=branch_id)
            if branch is None:
                return None
            # Get the current database state from the last module in the current
            # branch head. At the same time we retrieve the list of modules for
            # the current head of the branch.
            head = branch.get_head()
            if not head is None and len(head.modules) > 0:
                datasets = head.modules[-1].datasets
                modules = head.modules
                is_active = head.is_active
                is_error = head.modules[-1].is_error or head.modules[
                    -1].is_canceled
            else:
                datasets = dict()
                modules = list()
                is_active = False
                is_error = False
            # Get the external representation for the command
            external_form = command.to_external_form(
                command=self.packages[command.package_id].get(
                    command.command_id),
                datasets=datasets)
            # If the workflow is not active and the command can be executed
            # synchronously we run the command immediately and return the
            # completed workflow. Otherwise, a pending workflow is created.
            if not is_active and self.backend.can_execute(command):
                ts_start = get_current_time()
                result = self.backend.execute(task=TaskHandle(
                    task_id=get_unique_identifier(),
                    project_id=project_id,
                    controller=self),
                                              command=command,
                                              context=task_context(datasets))
                ts = ModuleTimestamp(created_at=ts_start,
                                     started_at=ts_start,
                                     finished_at=get_current_time())
                # Depending on the execution outcome create a handle for the
                # executed module
                if result.is_success:
                    module = ModuleHandle(
                        state=mstate.MODULE_SUCCESS,
                        command=command,
                        external_form=external_form,
                        timestamp=ts,
                        datasets=result.provenance.get_database_state(
                            modules[-1].datasets if len(modules) > 0 else dict(
                            )),
                        outputs=result.outputs,
                        provenance=result.provenance)
                else:
                    module = ModuleHandle(state=mstate.MODULE_ERROR,
                                          command=command,
                                          external_form=external_form,
                                          timestamp=ts,
                                          outputs=result.outputs)
                workflow = branch.append_workflow(modules=modules,
                                                  action=wf.ACTION_APPEND,
                                                  command=command,
                                                  pending_modules=[module])
            else:
                # Create new workflow by appending one module to the current
                # head of the branch. The module state is pending if the
                # workflow is active otherwise it depends on the associated
                # backend.
                if is_active:
                    state = mstate.MODULE_PENDING
                elif is_error:
                    state = mstate.MODULE_CANCELED
                else:
                    state = self.backend.next_task_state()
                workflow = branch.append_workflow(
                    modules=modules,
                    action=wf.ACTION_APPEND,
                    command=command,
                    pending_modules=[
                        ModuleHandle(state=state,
                                     command=command,
                                     external_form=external_form)
                    ])
                if not is_active and not state == mstate.MODULE_CANCELED:
                    self.execute_module(project_id=project_id,
                                        branch_id=branch_id,
                                        module=workflow.modules[-1],
                                        datasets=datasets)
        return workflow.modules[-1]
Exemplo n.º 23
0
    def load_module(identifier,
                    module_path,
                    prev_state=None,
                    object_store=None):
        """Load module from given object store.

        Parameters
        ----------
        identifier: string
            Unique module identifier
        module_path: string
            Resource path for module object
        prev_state: dict(string: vizier.datastore.dataset.DatasetDescriptor)
            Dataset descriptors keyed by the user-provided name that exist in
            the database state of the previous moudle (in sequence of occurrence
            in the workflow)
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources

        Returns
        -------
        vizier.viztrail.objectstore.module.OSModuleHandle
        """
        # Make sure the object store is not None
        if object_store is None:
            object_store = DefaultObjectStore()
        # Read object from store. This may raise a ValueError to indicate that
        # the module does not exists (in a system error condtion). In this
        # case we return a new module that is in error state.
        try:
            obj = object_store.read_object(object_path=module_path)
        except ValueError:
            return OSModuleHandle(
                identifier=identifier,
                command=ModuleCommand(package_id=UNKNOWN_ID,
                                      command_id=UNKNOWN_ID),
                external_form='fatal error: object not found',
                module_path=module_path,
                state=mstate.MODULE_ERROR,
                object_store=object_store)
        # Create module command
        command = ModuleCommand(package_id=obj[KEY_COMMAND][KEY_PACKAGE_ID],
                                command_id=obj[KEY_COMMAND][KEY_COMMAND_ID],
                                arguments=obj[KEY_COMMAND][KEY_ARGUMENTS])
        # Create module timestamps
        created_at = to_datetime(obj[KEY_TIMESTAMP][KEY_CREATED_AT])
        if KEY_STARTED_AT in obj[KEY_TIMESTAMP]:
            started_at = to_datetime(obj[KEY_TIMESTAMP][KEY_STARTED_AT])
        else:
            started_at = None
        if KEY_FINISHED_AT in obj[KEY_TIMESTAMP]:
            finished_at = to_datetime(obj[KEY_TIMESTAMP][KEY_FINISHED_AT])
        else:
            finished_at = None
        timestamp = ModuleTimestamp(created_at=created_at,
                                    started_at=started_at,
                                    finished_at=finished_at)
        # Create module output streams.
        outputs = ModuleOutputs(
            stdout=get_output_stream(obj[KEY_OUTPUTS][KEY_STDOUT]),
            stderr=get_output_stream(obj[KEY_OUTPUTS][KEY_STDERR]))
        # Create module provenance information
        read_prov = None
        if KEY_PROVENANCE_READ in obj[KEY_PROVENANCE]:
            read_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_READ]:
                read_prov[ds[KEY_DATASET_NAME]] = ds[KEY_DATASET_ID]
        write_prov = None
        if KEY_PROVENANCE_WRITE in obj[KEY_PROVENANCE]:
            write_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_WRITE]:
                descriptor = DatasetDescriptor(
                    identifier=ds[KEY_DATASET_ID],
                    columns=[
                        DatasetColumn(identifier=col[KEY_COLUMN_ID],
                                      name=col[KEY_COLUMN_NAME],
                                      data_type=col[KEY_COLUMN_TYPE])
                        for col in ds[KEY_DATASET_COLUMNS]
                    ],
                    row_count=ds[KEY_DATASET_ROWCOUNT])
                write_prov[ds[KEY_DATASET_NAME]] = descriptor
        delete_prov = None
        if KEY_PROVENANCE_DELETE in obj[KEY_PROVENANCE]:
            delete_prov = obj[KEY_PROVENANCE][KEY_PROVENANCE_DELETE]
        res_prov = None
        if KEY_PROVENANCE_RESOURCES in obj[KEY_PROVENANCE]:
            res_prov = obj[KEY_PROVENANCE][KEY_PROVENANCE_RESOURCES]
        charts_prov = None
        if KEY_PROVENANCE_CHARTS in obj[KEY_PROVENANCE]:
            charts_prov = [
                ChartViewHandle.from_dict(c)
                for c in obj[KEY_PROVENANCE][KEY_PROVENANCE_CHARTS]
            ]
        provenance = ModuleProvenance(read=read_prov,
                                      write=write_prov,
                                      delete=delete_prov,
                                      resources=res_prov,
                                      charts=charts_prov)
        # Create dictionary of dataset descriptors only if previous state is
        # given and the module is in SUCCESS state. Otherwise, the database
        # state is empty.
        if obj[KEY_STATE] == mstate.MODULE_SUCCESS and not prev_state is None:
            datasets = provenance.get_database_state(prev_state)
        else:
            datasets = dict()
        # Return module handle
        return OSModuleHandle(identifier=identifier,
                              command=command,
                              external_form=obj[KEY_EXTERNAL_FORM],
                              module_path=module_path,
                              state=obj[KEY_STATE],
                              timestamp=timestamp,
                              datasets=datasets,
                              outputs=outputs,
                              provenance=provenance,
                              object_store=object_store)
Exemplo n.º 24
0
    def load_module(
            identifier: str, 
            module_path: str, 
            prev_state: Optional[Dict[str, ArtifactDescriptor]] = None, 
            object_store: ObjectStore = DefaultObjectStore()
        ) -> "OSModuleHandle":
        """Load module from given object store.

        Parameters
        ----------
        identifier: string
            Unique module identifier
        module_path: string
            Resource path for module object
        prev_state: dict(string: vizier.datastore.dataset.DatasetDescriptor)
            Dataset descriptors keyed by the user-provided name that exist in
            the database state of the previous moudle (in sequence of occurrence
            in the workflow)
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources

        Returns
        -------
        vizier.viztrail.objectstore.module.OSModuleHandle
        """
        # Make sure the object store is not None
        # Read object from store. This may raise a ValueError to indicate that
        # the module does not exists (in a system error condtion). In this
        # case we return a new module that is in error state.
        try:
            obj = cast(Dict[str, Any], object_store.read_object(object_path=module_path))
        except ValueError:
            return OSModuleHandle(
                identifier=identifier,
                command=ModuleCommand(
                    package_id=UNKNOWN_ID,
                    command_id=UNKNOWN_ID,
                    arguments=list(),
                    packages=None
                ),
                external_form='fatal error: object not found',
                module_path=module_path,
                state=mstate.MODULE_ERROR,
                object_store=object_store
            )
        # Create module command
        command = ModuleCommand(
            package_id=obj[KEY_COMMAND][KEY_PACKAGE_ID],
            command_id=obj[KEY_COMMAND][KEY_COMMAND_ID],
            arguments=obj[KEY_COMMAND][KEY_ARGUMENTS],
            packages=None
        )
        # Create module timestamps
        created_at = to_datetime(obj[KEY_TIMESTAMP][KEY_CREATED_AT])
        if KEY_STARTED_AT in obj[KEY_TIMESTAMP]:
            started_at: Optional[datetime] = to_datetime(obj[KEY_TIMESTAMP][KEY_STARTED_AT])
        else:
            started_at = None
        if KEY_FINISHED_AT in obj[KEY_TIMESTAMP]:
            finished_at: Optional[datetime] = to_datetime(obj[KEY_TIMESTAMP][KEY_FINISHED_AT])
        else:
            finished_at = None
        timestamp = ModuleTimestamp(
            created_at=created_at,
            started_at=started_at,
            finished_at=finished_at
        )
        # Create module output streams.
        outputs = ModuleOutputs(
            stdout=get_output_stream(obj[KEY_OUTPUTS][KEY_STDOUT]),
            stderr=get_output_stream(obj[KEY_OUTPUTS][KEY_STDERR])
        )
        # Create module provenance information
        read_prov = None
        if KEY_PROVENANCE_READ in obj[KEY_PROVENANCE]:
            read_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_READ]:
                read_prov[ds[KEY_DATASET_NAME]] = ds[KEY_DATASET_ID]
        write_prov = None
        if KEY_PROVENANCE_WRITE in obj[KEY_PROVENANCE]:
            write_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_WRITE]:
                if KEY_DATAOBJECT_TYPE in ds:
                    descriptor = ArtifactDescriptor(
                        identifier=ds[KEY_DATAOBJECT_ID],
                        name=ds[KEY_DATAOBJECT_NAME],
                        artifact_type=ds[KEY_DATAOBJECT_TYPE])
                else: 
                    descriptor = DatasetDescriptor(
                        identifier=ds[KEY_DATASET_ID],
                        name=ds[KEY_DATASET_NAME],
                        columns=[
                            DatasetColumn(
                                identifier=col[KEY_COLUMN_ID],
                                name=col[KEY_COLUMN_NAME],
                                data_type=col[KEY_COLUMN_TYPE]
                            ) for col in ds[KEY_DATASET_COLUMNS]
                        ]
                    )
                write_prov[ds[KEY_DATASET_NAME]] = descriptor
        if KEY_PROVENANCE_DELETE in obj[KEY_PROVENANCE]:
            delete_prov = set(obj[KEY_PROVENANCE][KEY_PROVENANCE_DELETE])
        else:
            delete_prov = set()
        if KEY_PROVENANCE_RESOURCES in obj[KEY_PROVENANCE]:
            res_prov = cast(Dict[str, Any], obj[KEY_PROVENANCE][KEY_PROVENANCE_RESOURCES])
        else:
            res_prov = dict()
        if KEY_PROVENANCE_CHARTS in obj[KEY_PROVENANCE]:
            charts_prov = [
                ( 
                    c[0], 
                    ChartViewHandle.from_dict(c[1])  # type: ignore[no-untyped-call]
                ) if isinstance(c, list) else 
                (
                    "Chart",
                    ChartViewHandle.from_dict(c)
                )
                for c in obj[KEY_PROVENANCE][KEY_PROVENANCE_CHARTS]
            ]
        else:
            charts_prov = list()
        provenance = ModuleProvenance(
            read=read_prov,
            write=write_prov,
            delete=delete_prov,
            resources=res_prov,
            charts=charts_prov
        )
        # Return module handle
        return OSModuleHandle(
            identifier=identifier,
            command=command,
            external_form=obj[KEY_EXTERNAL_FORM],
            module_path=module_path,
            state=obj[KEY_STATE],
            timestamp=timestamp,
            outputs=outputs,
            provenance=provenance,
            object_store=object_store,
        )
Exemplo n.º 25
0
 def test_outputs(self):
     """Test reading and writing modules with output information."""
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(len(m.outputs.stderr), 0)
     self.assertEqual(len(m.outputs.stdout), 0)
     # Module with error output
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(stderr=[TextOutput('Some text')]),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(len(m.outputs.stderr), 1)
     self.assertTrue(m.outputs.stderr[0].is_text)
     self.assertEqual(m.outputs.stderr[0].value, 'Some text')
     self.assertEqual(len(m.outputs.stdout), 0)
     # Module with standard output
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(stdout=[
             TextOutput('Some text'),
             OutputObject(type='chart', value='123')
         ]),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(len(m.outputs.stdout), 2)
     self.assertTrue(m.outputs.stdout[0].is_text)
     self.assertEqual(m.outputs.stdout[0].value, 'Some text')
     self.assertFalse(m.outputs.stdout[1].is_text)
     self.assertEqual(m.outputs.stdout[1].value, '123')
     self.assertEqual(len(m.outputs.stderr), 0)
     # Module with standard error and standard output
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(stderr=[TextOutput('Some text')],
                               stdout=[
                                   TextOutput('Some text'),
                                   OutputObject(type='chart', value='123')
                               ]),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(len(m.outputs.stdout), 2)
     self.assertEqual(len(m.outputs.stderr), 1)
 def test_success(self) -> None:
     """Update module state from pending to success."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         timestamp=ModuleTimestamp(),
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2',
                                             name='ID2')}))
     self.assertTrue(module.is_pending)
     module.set_running(external_form='TEST MODULE')
     module.set_success()
     self.assertTrue(module.is_success)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertTrue(module.provenance.read == {})
     self.assertTrue(module.provenance.write == {})
     # Read module from object store and ensure that tall changes have been
     # materialized properly
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_success)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertTrue(module.provenance.read == {})
     self.assertTrue(module.provenance.write == {})
     # Set success with all optional parameters
     ts = get_current_time()
     module.set_success(
         finished_at=ts,
         outputs=ModuleOutputs(stdout=[TextOutput('XYZ')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2',
                                             name='ID2')}))
     self.assertTrue(module.is_success)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(module.timestamp.finished_at, ts)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 1)
     self.assertEqual(module.outputs.stdout[0].value, 'XYZ')
     self.assertIsNotNone(module.provenance.read)
     self.assertEqual(module.provenance.read['DS1'], 'ID1')
     self.assertIsNotNone(module.provenance.write)
     self.assertEqual(module.provenance.write['DS1'].identifier, 'ID2')
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path,
                                         prev_state=dict())
     self.assertTrue(module.is_success)
     self.assertIsNotNone(module.timestamp.started_at)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(module.timestamp.finished_at, ts)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 1)
     self.assertEqual(module.outputs.stdout[0].value, 'XYZ')
     self.assertIsNotNone(module.provenance.read)
     self.assertEqual(module.provenance.read['DS1'], 'ID1')
     self.assertIsNotNone(module.provenance.write)
     self.assertEqual(module.provenance.write['DS1'].identifier, 'ID2')
 def test_error(self):
     """Update module state from pending to error."""
     # Create original module
     module = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         module_folder=MODULE_DIR,
         outputs=ModuleOutputs(stdout=[TextOutput('ABC')]),
         provenance=ModuleProvenance(
             read={'DS1': 'ID1'},
             write={'DS1': DatasetDescriptor(identifier='ID2', name='ID2')},
             resources={'fileid': '0123456789'}),
         timestamp=ModuleTimestamp())
     module.set_error()
     self.assertTrue(module.is_error)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     self.assertEqual(module.provenance.resources['fileid'], '0123456789')
     # Read module from object store and ensure that tall changes have been
     # materialized properly
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_error)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(len(module.outputs.stderr), 0)
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     self.assertEqual(module.provenance.resources['fileid'], '0123456789')
     # Set canceled with timestamp and output information
     ts = get_current_time()
     module.set_error(
         finished_at=ts,
         outputs=ModuleOutputs(stderr=[TextOutput('Some Error')]))
     self.assertTrue(module.is_error)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(module.timestamp.finished_at, ts)
     self.assertEqual(len(module.outputs.stderr), 1)
     self.assertEqual(module.outputs.stderr[0].value, 'Some Error')
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     self.assertEqual(module.provenance.resources['fileid'], '0123456789')
     module = OSModuleHandle.load_module(identifier=module.identifier,
                                         module_path=module.module_path)
     self.assertTrue(module.is_error)
     self.assertIsNotNone(module.timestamp.finished_at)
     self.assertEqual(module.timestamp.finished_at, ts)
     self.assertEqual(len(module.outputs.stderr), 1)
     self.assertEqual(module.outputs.stderr[0].value, 'Some Error')
     self.assertEqual(len(module.outputs.stdout), 0)
     self.assertIsNotNone(module.provenance.read)
     self.assertIsNotNone(module.provenance.write)
     self.assertIsNotNone(module.provenance.resources)
     self.assertEqual(module.provenance.resources['fileid'], '0123456789')
Exemplo n.º 28
0
 def test_provenance(self):
     """Test reading and writing modules with provenance information."""
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR,
         datasets=DATASETS)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertIsNone(m.provenance.read)
     self.assertIsNone(m.provenance.write)
     self.assertIsNone(m.provenance.delete)
     self.assertIsNone(m.provenance.resources)
     # Modules that only has read provenance
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(read={'DS1': 'ID1'},
                                     resources={'fileId': '0123456789'}),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertIsNotNone(m.provenance.read)
     self.assertEqual(len(m.provenance.read), 1)
     self.assertEqual(m.provenance.read['DS1'], 'ID1')
     self.assertEqual(m.provenance.resources['fileId'], '0123456789')
     self.assertIsNone(m.provenance.write)
     # Modules that only has write provenance
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(write=DATASETS),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertIsNotNone(m.provenance.write)
     self.assertEqual(len(m.provenance.write), 2)
     self.assertEqual(m.provenance.write['DS1'].identifier, 'ID1')
     self.assertEqual(m.provenance.write['DS2'].identifier, 'ID2')
     self.assertIsNone(m.provenance.read)
     # Module with read and write provenance
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(read={'DS1': 'ID1'},
                                     write=DATASETS,
                                     delete=['A', 'B']),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertIsNotNone(m.provenance.read)
     self.assertEqual(len(m.provenance.read), 1)
     self.assertEqual(m.provenance.read['DS1'], 'ID1')
     self.assertIsNotNone(m.provenance.write)
     self.assertEqual(len(m.provenance.write), 2)
     self.assertEqual(m.provenance.write['DS1'].identifier, 'ID1')
     self.assertEqual(m.provenance.write['DS2'].identifier, 'ID2')
     self.assertEqual(m.provenance.delete, ['A', 'B'])
     # Module with chart
     chart = ChartViewHandle(identifier='A',
                             dataset_name='DS1',
                             chart_name='My Chart',
                             data=[
                                 DataSeriesHandle(column='COL1',
                                                  label='SERIES1',
                                                  range_start=0,
                                                  range_end=100),
                                 DataSeriesHandle(column='COL2',
                                                  range_start=101,
                                                  range_end=200),
                                 DataSeriesHandle(column='COL3',
                                                  label='SERIES2')
                             ],
                             x_axis=1,
                             chart_type='bar',
                             grouped_chart=True)
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(charts=[chart]),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(len(m.provenance.charts), 1)
     c = m.provenance.charts[0]
     self.assertEqual(chart.identifier, c.identifier)
     self.assertEqual(chart.dataset_name, c.dataset_name)
     self.assertEqual(chart.chart_name, c.chart_name)
     self.assertEqual(chart.x_axis, c.x_axis)
     self.assertEqual(chart.chart_type, c.chart_type)
     self.assertEqual(chart.grouped_chart, c.grouped_chart)
     self.assertEqual(len(c.data), 3)
     for i in range(3):
         self.assertEqual(c.data[i].column, chart.data[i].column)
         self.assertEqual(c.data[i].label, chart.data[i].label)
         self.assertEqual(c.data[i].range_start, chart.data[i].range_start)
         self.assertEqual(c.data[i].range_end, chart.data[i].range_end)