Пример #1
0
    def from_dict(obj):
        """Get a project instance from the dictionary representation returned
        by the vizier web service.

        Parameters
        ----------
        obj: dict
            Dictionary serialization of a project handle

        Returns
        -------
        vizier.api.client.resources.project.ProjectResource
        """
        # Get the name from the properties list
        name = None
        for prop in obj['properties']:
            if prop['key'] == 'name':
                name = prop['value']
                break
        default_branch = None
        if 'branches' in obj:
            for branch in obj['branches']:
                if branch['isDefault']:
                    default_branch = branch['id']
                    break
        return ProjectResource(identifier=obj['id'],
                               name=name,
                               created_at=to_datetime(obj['createdAt']),
                               last_modified_at=to_datetime(
                                   obj['lastModifiedAt']),
                               default_branch=default_branch)
Пример #2
0
    def from_dict(obj):
        """Get a branch resource instance from the dictionary representation
        returned by the vizier web service.

        Parameters
        ----------
        obj: dict
            Dictionary serialization of a branch descriptor or handle

        Returns
        -------
        vizier.api.client.resources.branch.BranchResource
        """
        # Get the name from the properties list
        name = None
        for prop in obj['properties']:
            if prop['key'] == 'name':
                name = prop['value']
                break
        workflows = None
        if 'workflows' in obj:
            workflows = [
                WorkflowResource.from_dict(wf) for wf in obj['workflows']
            ]
        return BranchResource(identifier=obj['id'],
                              name=name,
                              created_at=to_datetime(obj['createdAt']),
                              last_modified_at=to_datetime(
                                  obj['lastModifiedAt']),
                              workflows=workflows)
Пример #3
0
    def from_dict(obj: Dict[str, Any]) -> "ProjectResource":
        """Get a project instance from the dictionary representation returned
        by the vizier web service.

        Parameters
        ----------
        obj: dict
            Dictionary serialization of a project handle

        Returns
        -------
        vizier.api.client.resources.project.ProjectResource
        """
        # Get the name from the properties list
        name = obj.get('properties', {}).get("name", None)
        default_branch = None
        if 'branches' in obj:
            for branch in obj['branches']:
                if branch['isDefault']:
                    default_branch = branch['id']
                    break
        return ProjectResource(identifier=obj['id'],
                               name=name,
                               created_at=to_datetime(obj['createdAt']),
                               last_modified_at=to_datetime(
                                   obj['lastModifiedAt']),
                               default_branch=default_branch)
Пример #4
0
    def from_dict(obj):
        """Get a workflow module resource instance from the dictionary
        representation returned by the vizier web service.

        Parameters
        ----------
        obj: dict
            Dictionary serialization of a workflow module handle

        Returns
        -------
        vizier.api.client.resources.module.ModuleResource
        """
        # Create a list of outputs.
        outputs = list()
        if 'outputs' in obj:
            for out in obj['outputs']['stdout']:
                if out['type'] in [OUTPUT_TEXT, OUTPUT_HTML]:
                    outputs.append(out['value'])
            for out in obj['outputs']['stderr']:
                outputs.append(out['value'])
        # Create the timestamp
        ts = obj['timestamps']
        timestamp = ModuleTimestamp(created_at=to_datetime(ts['createdAt']))
        if 'startedAt' in ts:
            timestamp.started_at = to_datetime(ts['startedAt'])
        if 'finishedAt' in ts:
            timestamp.finished_at = to_datetime(ts['finishedAt'])
        # Create dictionary of available datasets
        datasets = dict()
        if 'datasets' in obj:
            for ds in obj['datasets']:
                datasets[ds['name']] = ds['id']
        dataobjects = dict()
        if 'dataobjects' in obj:
            for dobj in obj['dataobjects']:
                dataobjects[dobj['name']] = dobj['id']
        charts = dict()
        if 'charts' in obj:
            for ch in obj['charts']:
                c_handle = ChartHandle.from_dict(ch)
                charts[c_handle.name] = c_handle
        return ModuleResource(identifier=obj['id'],
                              state=to_external_form(obj['state']),
                              external_form=obj['text'],
                              outputs=outputs,
                              datasets=datasets,
                              dataobjects=dataobjects,
                              charts=charts,
                              timestamp=timestamp,
                              links=deserialize.HATEOAS(links=obj['links']))
Пример #5
0
 def test_timestamps(self):
     """Test reading and writing modules with different timestamp values."""
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     # Test timestamps
     created_at = m.timestamp.created_at
     started_at = to_datetime('2018-11-26T13:00:00.000000')
     m.timestamp.started_at = started_at
     m.write_module()
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(m.timestamp.created_at, created_at)
     self.assertEqual(m.timestamp.started_at, started_at)
     finished_at = to_datetime('2018-11-26T13:00:00.000010')
     m.timestamp.created_at = finished_at
     m.timestamp.finished_at = finished_at
     m.write_module()
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(m.timestamp.created_at, finished_at)
     self.assertEqual(m.timestamp.started_at, started_at)
     self.assertEqual(m.timestamp.finished_at, finished_at)
     mod0 = OSModuleHandle.create_module(
         command=python_cell(source='print 2+2'),
         external_form='TEST MODULE',
         state=MODULE_PENDING,
         outputs=ModuleOutputs(),
         provenance=ModuleProvenance(),
         timestamp=ModuleTimestamp(created_at=created_at,
                                   started_at=started_at),
         module_folder=MODULE_DIR)
     m = OSModuleHandle.load_module(identifier=mod0.identifier,
                                    module_path=mod0.module_path)
     self.assertEqual(m.timestamp.created_at, created_at)
     self.assertEqual(m.timestamp.started_at, started_at)
     self.assertIsNone(m.timestamp.finished_at)
Пример #6
0
    def from_file(fs_dir, envs):
        """Read the viztrail state from file.

        Raises IOError if the viztrail file does not exist.

        Parameters
        ----------
        fs_dir: string
            Base directory where all viztrail information is stored
        envs: dict(string: vizier.config.ExecEnv)
            Dictionary of workflow execution environments

        Returns
        -------
        vizier.workflow.repository.fs.FileSystemViztrailHandle
        """
        # Read vizrail information for file (in Yaml format)
        try:
            with open(os.path.join(fs_dir, VIZTRAIL_FILE), 'r') as f:
                doc = load_json(f.read())
        except:
            with open(os.path.join(fs_dir, VIZTRAIL_FILE), 'r') as f:
                doc = yaml.load(f.read(), Loader=CLoader)
        # Read information about viztrail branches
        return FileSystemViztrailHandle(
            doc['id'], {
                b['id']: ViztrailBranch(
                    b['id'],
                    FilePropertiesHandler(branch_file(fs_dir, b['id'])),
                    FileSystemBranchProvenance(
                        branch_prov_file(fs_dir, b['id'])),
                    workflows=[
                        WorkflowVersionDescriptor.from_dict(v)
                        for v in b['versions']
                    ])
                for b in doc['branches']
            }, envs[doc['env']],
            FilePropertiesHandler(os.path.join(fs_dir, PROPERTIES_FILE)),
            to_datetime(doc['timestamps']['createdAt']),
            to_datetime(doc['timestamps']['lastModifiedAt']),
            doc['versionCounter'], doc['moduleCounter'], fs_dir)
Пример #7
0
    def from_dict(obj):
        """Create descriptor instance from dictionary serialization.

        Returns
        -------
        vizier.workflow.base.WorkflowVersionDescriptor
        """
        return WorkflowVersionDescriptor(
            obj['version'],
            action=obj['action'] if 'action' in obj else None,
            package_id=obj['packageId'] if 'packageId' in obj else None,
            command_id=obj['commandId'] if 'commandId' in obj else None,
            created_at=to_datetime(obj['createdAt']))
Пример #8
0
    def from_dict(doc, func_filepath):
        """Create a file handle instance from a dictionary representations.
        Requires a function that maps the file identifier to the actual file on
        disk.

        Parameters
        ----------
        doc: dict
            Dictionary representation of the file handle
        func_filepath: func
            Function that maps the file identifier to a file on disk.
        """
        # Transform properties list
        properties = dict()
        for p in [ObjectProperty.from_dict(obj) for obj in doc['properties']]:
            properties[p.key] = p.value
        # Return new file handle
        return FileHandle(doc['identifier'],
                          doc['name'],
                          func_filepath(doc['identifier']),
                          to_datetime(doc['createdAt']),
                          last_modified_at=to_datetime(doc['lastModifiedAt']),
                          properties=properties,
                          active=doc['active'])
Пример #9
0
    def from_dict(obj):
        """Get a workflow resource instance from the dictionary representation
        returned by the vizier web service.

        Parameters
        ----------
        obj: dict
            Dictionary serialization of a workflow descriptor or handle

        Returns
        -------
        vizier.api.client.resources.workflow.WorkflowResource
        """
        # Get the action name
        action = None
        command = None
        created_at = None
        if 'action' in obj:
            action = to_external_form(obj['action'])
            created_at = to_datetime(obj['createdAt'])
            # Get the command name
            package_id = obj['packageId']
            if not package_id is None:
                package = PACKAGES[package_id]['command']
                for cmd in package:
                    if cmd['id'] == obj['commandId']:
                        command = cmd['name']
            else:
                command = 'Create Branch'
        modules = None
        if 'modules' in obj:
            modules = [ModuleResource.from_dict(m) for m in obj['modules']]
        datasets = None
        if 'datasets' in obj:
            datasets = {
                ds['id']: DatasetDescriptor.from_dict(ds)
                for ds in obj['datasets']
            }
        links = None
        if 'links' in obj:
            links = deserialize.HATEOAS(links=obj['links'])
        return WorkflowResource(identifier=obj['id'],
                                action=action,
                                command=command,
                                created_at=created_at,
                                modules=modules,
                                datasets=datasets,
                                links=links)
Пример #10
0
    def get_workflow(self, branch_id=DEFAULT_BRANCH, version=-1):
        """Get the workflow with the given version number from the workflow
        history of the given branch.

        Returns None if the branch or the workflow version do not exist.

        Parameters
        ----------
        branch_id: string, optional
            Unique branch identifier
        version: int, optional
            Workflow version number
        """
        # Return None if branch does not exist
        if not branch_id in self.branches:
            return None
        branch = self.branches[branch_id]
        if version <= 0 and len(branch.workflows) == 0:
            # Returns an empty workflow if the branch does not contain any
            # executed workflows yet.
            return WorkflowHandle(branch_id, -1, get_current_time(), [])
        # Get version number of branch HEAD if negative version is given
        wf_file = None
        if version < 0 and len(branch.workflows) > 0:
            wf_file = workflow_file(self.fs_dir, branch.workflows[-1].version)
        else:
            for wf_desc in branch.workflows:
                if wf_desc.version == version:
                    wf_file = workflow_file(self.fs_dir, version)
                    break
        # Return None if version number is not in branch (indicated by an non-
        # existing workflow file)
        if wf_file is None:
            return None
        # Read workflow handle from file
        try:
            with open(wf_file, 'r') as f:
                doc = load_json(f.read())
        except:
            with open(wf_file, 'r') as f:
                doc = yaml.load(f.read(), Loader=CLoader)
        return WorkflowHandle(
            branch_id, doc['version'], to_datetime(doc['createdAt']),
            [ModuleHandle.from_dict(m) for m in doc['modules']])
Пример #11
0
    def load_module(identifier,
                    module_path,
                    prev_state=None,
                    object_store=None):
        """Load module from given object store.

        Parameters
        ----------
        identifier: string
            Unique module identifier
        module_path: string
            Resource path for module object
        prev_state: dict(string: vizier.datastore.dataset.DatasetDescriptor)
            Dataset descriptors keyed by the user-provided name that exist in
            the database state of the previous moudle (in sequence of occurrence
            in the workflow)
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources

        Returns
        -------
        vizier.viztrail.objectstore.module.OSModuleHandle
        """
        # Make sure the object store is not None
        if object_store is None:
            object_store = DefaultObjectStore()
        # Read object from store. This may raise a ValueError to indicate that
        # the module does not exists (in a system error condtion). In this
        # case we return a new module that is in error state.
        try:
            obj = object_store.read_object(object_path=module_path)
        except ValueError:
            return OSModuleHandle(
                identifier=identifier,
                command=ModuleCommand(package_id=UNKNOWN_ID,
                                      command_id=UNKNOWN_ID),
                external_form='fatal error: object not found',
                module_path=module_path,
                state=mstate.MODULE_ERROR,
                object_store=object_store)
        # Create module command
        command = ModuleCommand(package_id=obj[KEY_COMMAND][KEY_PACKAGE_ID],
                                command_id=obj[KEY_COMMAND][KEY_COMMAND_ID],
                                arguments=obj[KEY_COMMAND][KEY_ARGUMENTS])
        # Create module timestamps
        created_at = to_datetime(obj[KEY_TIMESTAMP][KEY_CREATED_AT])
        if KEY_STARTED_AT in obj[KEY_TIMESTAMP]:
            started_at = to_datetime(obj[KEY_TIMESTAMP][KEY_STARTED_AT])
        else:
            started_at = None
        if KEY_FINISHED_AT in obj[KEY_TIMESTAMP]:
            finished_at = to_datetime(obj[KEY_TIMESTAMP][KEY_FINISHED_AT])
        else:
            finished_at = None
        timestamp = ModuleTimestamp(created_at=created_at,
                                    started_at=started_at,
                                    finished_at=finished_at)
        # Create module output streams.
        outputs = ModuleOutputs(
            stdout=get_output_stream(obj[KEY_OUTPUTS][KEY_STDOUT]),
            stderr=get_output_stream(obj[KEY_OUTPUTS][KEY_STDERR]))
        # Create module provenance information
        read_prov = None
        if KEY_PROVENANCE_READ in obj[KEY_PROVENANCE]:
            read_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_READ]:
                read_prov[ds[KEY_DATASET_NAME]] = ds[KEY_DATASET_ID]
        write_prov = None
        if KEY_PROVENANCE_WRITE in obj[KEY_PROVENANCE]:
            write_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_WRITE]:
                descriptor = DatasetDescriptor(
                    identifier=ds[KEY_DATASET_ID],
                    columns=[
                        DatasetColumn(identifier=col[KEY_COLUMN_ID],
                                      name=col[KEY_COLUMN_NAME],
                                      data_type=col[KEY_COLUMN_TYPE])
                        for col in ds[KEY_DATASET_COLUMNS]
                    ],
                    row_count=ds[KEY_DATASET_ROWCOUNT])
                write_prov[ds[KEY_DATASET_NAME]] = descriptor
        delete_prov = None
        if KEY_PROVENANCE_DELETE in obj[KEY_PROVENANCE]:
            delete_prov = obj[KEY_PROVENANCE][KEY_PROVENANCE_DELETE]
        res_prov = None
        if KEY_PROVENANCE_RESOURCES in obj[KEY_PROVENANCE]:
            res_prov = obj[KEY_PROVENANCE][KEY_PROVENANCE_RESOURCES]
        charts_prov = None
        if KEY_PROVENANCE_CHARTS in obj[KEY_PROVENANCE]:
            charts_prov = [
                ChartViewHandle.from_dict(c)
                for c in obj[KEY_PROVENANCE][KEY_PROVENANCE_CHARTS]
            ]
        provenance = ModuleProvenance(read=read_prov,
                                      write=write_prov,
                                      delete=delete_prov,
                                      resources=res_prov,
                                      charts=charts_prov)
        # Create dictionary of dataset descriptors only if previous state is
        # given and the module is in SUCCESS state. Otherwise, the database
        # state is empty.
        if obj[KEY_STATE] == mstate.MODULE_SUCCESS and not prev_state is None:
            datasets = provenance.get_database_state(prev_state)
        else:
            datasets = dict()
        # Return module handle
        return OSModuleHandle(identifier=identifier,
                              command=command,
                              external_form=obj[KEY_EXTERNAL_FORM],
                              module_path=module_path,
                              state=obj[KEY_STATE],
                              timestamp=timestamp,
                              datasets=datasets,
                              outputs=outputs,
                              provenance=provenance,
                              object_store=object_store)
Пример #12
0
    def load_branch(
            identifier: str, 
            is_default: bool, 
            base_path: str, 
            modules_folder: str, 
            object_store: Optional[ObjectStore] = None
        ):
        """Load branch from disk. Reads the branch provenance information and
        descriptors for all workflows in the branch history. If the branch
        history is not empty the modules for the workflow at the branch head
        will be read as well.

        Parameters
        ----------
        identifier: string
            Unique branch identifier
        is_default: bool
            True if this is the default branch for its viztrail
        base_path: string
            Path to folder containing branch resources
        modules_folder: string
            Path to folder containing workflow modules
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources

        Returns
        -------
        vizier.viztrail.objectstore.branch.OSBranchHandle
        """
        # Make sure the object store is not None
        if object_store is None:
            object_store = DefaultObjectStore()
        # Load branch provenance. The object will contain the created_at
        # timestamp and optionally the three entries that define the branch
        # point.
        doc = cast(Dict[str, Any], object_store.read_object(
            object_store.join(base_path, OBJ_METADATA)
        ))
        created_at = to_datetime(doc[KEY_CREATED_AT])
        if len(doc) == 4:
            provenance = BranchProvenance(
                source_branch=doc[KEY_SOURCE_BRANCH],
                workflow_id=doc[KEY_WORKFLOW_ID],
                module_id=doc[KEY_MODULE_ID],
                created_at=created_at
            )
        else:
            provenance = BranchProvenance(created_at=created_at)
        # Read descriptors for all branch workflows. Workflow descriptors are
        # objects in the base directory that do no match the name of any of the
        # predefied branch object.
        workflows = list()
        for resource in object_store.list_objects(base_path):
            if not resource in [OBJ_METADATA, OBJ_PROPERTIES]:
                resource_path = object_store.join(base_path, resource)
                obj = cast(Dict[str, Any], object_store.read_object(resource_path))
                desc = obj[KEY_WORKFLOW_DESCRIPTOR]
                workflows.append(
                    WorkflowDescriptor(
                        identifier=obj[KEY_WORKFLOW_ID],
                        action=desc[KEY_ACTION],
                        package_id=desc[KEY_PACKAGE_ID],
                        command_id=desc[KEY_COMMAND_ID],
                        created_at=to_datetime(desc[KEY_CREATED_AT])
                    )
                )
        # Sort workflows in ascending order of their identifier
        workflows.sort(key=lambda x: x.identifier)
        # Read all modules for the workflow at the branch head (if exists)
        head = None
        if len(workflows) > 0:
            # The workflow descriptor is the last element in the workflows list
            descriptor = workflows[-1]
            head = read_workflow(
                branch_id=identifier,
                workflow_descriptor=descriptor,
                workflow_path=object_store.join(
                    base_path,
                    descriptor.identifier
                ),
                modules_folder=modules_folder,
                object_store=object_store
            )
        return OSBranchHandle(
            identifier=identifier,
            is_default=is_default,
            base_path=base_path,
            modules_folder=modules_folder,
            provenance=provenance,
            properties=PersistentAnnotationSet(
                object_path=object_store.join(base_path, OBJ_PROPERTIES),
                object_store=object_store
            ),
            workflows=workflows,
            head=head,
            object_store=object_store
        )
Пример #13
0
    def update_task_state(self, task_id, state, body):
        """Update that state pf a given task. The contents of the request body
        depend on the value of the new task state.

        Raises a ValueError if the request body is invalid. The result is None
        if the task is unknown. Otherwise, the result is a dictionary with a
        single result value. The result is 0 if the task state did not change.
        A positive value signals a successful task state change.

        Parameters
        ----------
        task_id: string
            Unique task identifier
        state: int
            The new state of the task
        body: dict
            State-dependent additional information

        Returns
        -------
        dict
        """
        # Depending on the requested state change call the respective method
        # after extracting additional parameters from the request body.
        result = None
        if state == states.MODULE_RUNNING:
            if labels.STARTED_AT in body:
                result = self.engine.set_running(
                    task_id=task_id,
                    started_at=to_datetime(body[labels.STARTED_AT])
                )
            else:
                result = self.engine.set_running(task_id=task_id)
        elif state == states.MODULE_ERROR:
            finished_at = None
            if labels.FINISHED_AT in body:
                finished_at = to_datetime(body[labels.FINISHED_AT])
            outputs = None
            if labels.OUTPUTS in body:
                outputs = deserialize.OUTPUTS(body[labels.OUTPUTS])
            result = self.engine.set_error(
                task_id=task_id,
                finished_at=finished_at,
                outputs=outputs
            )
        elif state == states.MODULE_SUCCESS:
            finished_at = None
            if labels.FINISHED_AT in body:
                finished_at = to_datetime(body[labels.FINISHED_AT])
            outputs = None
            if labels.OUTPUTS in body:
                outputs = deserialize.OUTPUTS(body[labels.OUTPUTS])
            provenance = None
            if labels.PROVENANCE in body:
                provenance = deserialize.PROVENANCE(body[labels.PROVENANCE])
            result = self.engine.set_success(
                task_id=task_id,
                finished_at=finished_at,
                outputs=outputs,
                provenance=provenance
            )
        else:
            raise ValueError('invalid state change')
        # Create state change result
        if not result is None:
            return {labels.RESULT: result}
        return None
Пример #14
0
    def load_module(
            identifier: str, 
            module_path: str, 
            prev_state: Optional[Dict[str, ArtifactDescriptor]] = None, 
            object_store: ObjectStore = DefaultObjectStore()
        ) -> "OSModuleHandle":
        """Load module from given object store.

        Parameters
        ----------
        identifier: string
            Unique module identifier
        module_path: string
            Resource path for module object
        prev_state: dict(string: vizier.datastore.dataset.DatasetDescriptor)
            Dataset descriptors keyed by the user-provided name that exist in
            the database state of the previous moudle (in sequence of occurrence
            in the workflow)
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources

        Returns
        -------
        vizier.viztrail.objectstore.module.OSModuleHandle
        """
        # Make sure the object store is not None
        # Read object from store. This may raise a ValueError to indicate that
        # the module does not exists (in a system error condtion). In this
        # case we return a new module that is in error state.
        try:
            obj = cast(Dict[str, Any], object_store.read_object(object_path=module_path))
        except ValueError:
            return OSModuleHandle(
                identifier=identifier,
                command=ModuleCommand(
                    package_id=UNKNOWN_ID,
                    command_id=UNKNOWN_ID,
                    arguments=list(),
                    packages=None
                ),
                external_form='fatal error: object not found',
                module_path=module_path,
                state=mstate.MODULE_ERROR,
                object_store=object_store
            )
        # Create module command
        command = ModuleCommand(
            package_id=obj[KEY_COMMAND][KEY_PACKAGE_ID],
            command_id=obj[KEY_COMMAND][KEY_COMMAND_ID],
            arguments=obj[KEY_COMMAND][KEY_ARGUMENTS],
            packages=None
        )
        # Create module timestamps
        created_at = to_datetime(obj[KEY_TIMESTAMP][KEY_CREATED_AT])
        if KEY_STARTED_AT in obj[KEY_TIMESTAMP]:
            started_at: Optional[datetime] = to_datetime(obj[KEY_TIMESTAMP][KEY_STARTED_AT])
        else:
            started_at = None
        if KEY_FINISHED_AT in obj[KEY_TIMESTAMP]:
            finished_at: Optional[datetime] = to_datetime(obj[KEY_TIMESTAMP][KEY_FINISHED_AT])
        else:
            finished_at = None
        timestamp = ModuleTimestamp(
            created_at=created_at,
            started_at=started_at,
            finished_at=finished_at
        )
        # Create module output streams.
        outputs = ModuleOutputs(
            stdout=get_output_stream(obj[KEY_OUTPUTS][KEY_STDOUT]),
            stderr=get_output_stream(obj[KEY_OUTPUTS][KEY_STDERR])
        )
        # Create module provenance information
        read_prov = None
        if KEY_PROVENANCE_READ in obj[KEY_PROVENANCE]:
            read_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_READ]:
                read_prov[ds[KEY_DATASET_NAME]] = ds[KEY_DATASET_ID]
        write_prov = None
        if KEY_PROVENANCE_WRITE in obj[KEY_PROVENANCE]:
            write_prov = dict()
            for ds in obj[KEY_PROVENANCE][KEY_PROVENANCE_WRITE]:
                if KEY_DATAOBJECT_TYPE in ds:
                    descriptor = ArtifactDescriptor(
                        identifier=ds[KEY_DATAOBJECT_ID],
                        name=ds[KEY_DATAOBJECT_NAME],
                        artifact_type=ds[KEY_DATAOBJECT_TYPE])
                else: 
                    descriptor = DatasetDescriptor(
                        identifier=ds[KEY_DATASET_ID],
                        name=ds[KEY_DATASET_NAME],
                        columns=[
                            DatasetColumn(
                                identifier=col[KEY_COLUMN_ID],
                                name=col[KEY_COLUMN_NAME],
                                data_type=col[KEY_COLUMN_TYPE]
                            ) for col in ds[KEY_DATASET_COLUMNS]
                        ]
                    )
                write_prov[ds[KEY_DATASET_NAME]] = descriptor
        if KEY_PROVENANCE_DELETE in obj[KEY_PROVENANCE]:
            delete_prov = set(obj[KEY_PROVENANCE][KEY_PROVENANCE_DELETE])
        else:
            delete_prov = set()
        if KEY_PROVENANCE_RESOURCES in obj[KEY_PROVENANCE]:
            res_prov = cast(Dict[str, Any], obj[KEY_PROVENANCE][KEY_PROVENANCE_RESOURCES])
        else:
            res_prov = dict()
        if KEY_PROVENANCE_CHARTS in obj[KEY_PROVENANCE]:
            charts_prov = [
                ( 
                    c[0], 
                    ChartViewHandle.from_dict(c[1])  # type: ignore[no-untyped-call]
                ) if isinstance(c, list) else 
                (
                    "Chart",
                    ChartViewHandle.from_dict(c)
                )
                for c in obj[KEY_PROVENANCE][KEY_PROVENANCE_CHARTS]
            ]
        else:
            charts_prov = list()
        provenance = ModuleProvenance(
            read=read_prov,
            write=write_prov,
            delete=delete_prov,
            resources=res_prov,
            charts=charts_prov
        )
        # Return module handle
        return OSModuleHandle(
            identifier=identifier,
            command=command,
            external_form=obj[KEY_EXTERNAL_FORM],
            module_path=module_path,
            state=obj[KEY_STATE],
            timestamp=timestamp,
            outputs=outputs,
            provenance=provenance,
            object_store=object_store,
        )
Пример #15
0
    def load_viztrail(
        base_path: str,
        object_store: Optional[ObjectStore] = None
    ) -> Optional["OSViztrailHandle"]:
        """Load all viztrail resources from given object store.

        Parameters
        ----------
        base_path: string
            Identifier for folder containing viztrail resources
        object_store: vizier.core.io.base.ObjectStore, optional
            Object store implementation to access and maintain resources

        Returns
        -------
        vizier.viztrail.driver.os.viztrail.OSViztrailHandle
        """
        # Make sure the object store is not None
        if object_store is None:
            object_store = DefaultObjectStore()
        object_store = cast(ObjectStore, object_store)
        # Load viztrail metadata
        metadata = object_store.read_object(
            object_store.join(base_path, OBJ_METADATA))
        if metadata is None:
            return None
        metadata = cast(Dict[str, Any], metadata)
        identifier = metadata[KEY_IDENTIFIER]
        created_at = to_datetime(metadata[KEY_CREATED_AT])
        # Load active branches. The branch index resource contains a list of
        # active branch identifiers.
        branch_folder = object_store.join(base_path, FOLDER_BRANCHES)
        branch_index = object_store.join(branch_folder, OBJ_BRANCHINDEX)
        modules_folder = object_store.join(base_path, FOLDER_MODULES)
        branches = list()
        default_branch: Optional[BranchHandle] = None
        for b in cast(List[Dict[str, Any]],
                      object_store.read_object(branch_index)):
            branch_id = b[KEY_IDENTIFIER]
            is_default = b[KEY_DEFAULT]
            branches.append(
                OSBranchHandle.load_branch(identifier=branch_id,
                                           is_default=is_default,
                                           base_path=object_store.join(
                                               branch_folder, branch_id),
                                           modules_folder=modules_folder,
                                           object_store=object_store))
            if is_default:
                default_branch = branches[-1]
        # Return handle for new viztrail
        return OSViztrailHandle(identifier=identifier,
                                properties=PersistentAnnotationSet(
                                    object_path=object_store.join(
                                        base_path, OBJ_PROPERTIES),
                                    object_store=object_store),
                                branches=branches,
                                default_branch=default_branch,
                                created_at=created_at,
                                base_path=base_path,
                                object_store=object_store,
                                branch_index=branch_index,
                                branch_folder=branch_folder,
                                modules_folder=modules_folder)