示例#1
0
def get_url_factory(
        config: AppConfig, 
        projects: ProjectCache
    ) -> UrlFactory:
    """Get the url factory for a given configuration. In most cases we use the
    default url factory. Only for the configuration where each project is
    running in a separate container we need a different factory.

    Parameter
    ---------
    config: vizier.config.app.AppConfig
        Application configuration object
    projects: vizier.engine.project.cache.base.ProjectCache
        Cache for projects (only used for container engine)

    Returns
    -------
    vizier.api.routes.base.UrlFactory
    """
    if config.engine.identifier == base.CONTAINER_ENGINE:
        return ContainerEngineUrlFactory(
            base_url=config.app_base_url,
            api_doc_url=config.webservice.doc_url,
            projects=cast(ContainerProjectCache, projects)
        )
    else:
        return UrlFactory(
            base_url=config.app_base_url,
            api_doc_url=config.webservice.doc_url
        )
示例#2
0
def main(args):
    """Read user input from stdin until either quit, exit or CTRL-D is entered.
    """
    # Initialize the url factory and read default values.
    app_dir = get_base_directory()
    config_file = os.path.join(app_dir, CONFIG_FILE)
    if len(args) in [1, 2] and args[0] == 'init':
        if len(args) == 2:
            url = args[1]
        elif not os.path.isfile(config_file):
            url = 'http://localhost:5000/vizier-db/api/v1'
        else:
            # Print the URL of the API in the configuration file
            config = read_object_from_file(config_file)
            print_header()
            print('\nConnected to API at ' + config['url'])
            return
        if not os.path.isdir(app_dir):
            os.makedirs(app_dir)
        with open(config_file, 'w') as f:
            json.dump({'url': url}, f)
    elif not os.path.isfile(config_file):
        raise ValueError('vizier client is not initialized')
    else:
        config = read_object_from_file(config_file)
        defaults_file = os.path.join(app_dir, 'defaults.json')
        defaults = PersistentAnnotationSet(object_path=defaults_file)
        CommandInterpreter(urls=UrlFactory(base_url=config['url']),
                           defaults=defaults).eval(args)
示例#3
0
 def test_init_url_factory(self):
     """Test initializing the main url factory."""
     urls = UrlFactory(base_url='http://abc.com/////')
     self.assertEqual(urls.base_url, 'http://abc.com')
     self.assertIsNone(urls.api_doc_url)
     urls = UrlFactory(base_url='http://abc.com/////', api_doc_url='ABC')
     self.assertEqual(urls.base_url, 'http://abc.com')
     self.assertEqual(urls.api_doc_url, 'ABC')
     # Override API doc url via properties
     urls = UrlFactory(base_url='http://abc.com/////',
                       api_doc_url='ABC',
                       properties={PROPERTIES_APIDOCURL: 'XYZ'})
     self.assertEqual(urls.base_url, 'http://abc.com')
     self.assertEqual(urls.api_doc_url, 'XYZ')
     # Override base url via properties
     urls = UrlFactory(base_url='http://abc.com/////',
                       api_doc_url='ABC',
                       properties={PROPERTIES_BASEURL: 'XYZ'})
     self.assertEqual(urls.base_url, 'XYZ')
     self.assertEqual(urls.api_doc_url, 'ABC')
     # Initialize only via properties
     urls = UrlFactory(properties={
         PROPERTIES_BASEURL: 'XYZ',
         PROPERTIES_APIDOCURL: 'ABC'
     })
     self.assertEqual(urls.base_url, 'XYZ')
     self.assertEqual(urls.api_doc_url, 'ABC')
     # Value error if base url is not set
     with self.assertRaises(ValueError):
         urls = UrlFactory(api_doc_url='ABC',
                           properties={PROPERTIES_APIDOCURL: 'XYZ'})
示例#4
0
    def get_datastore(self, identifier):
        """Get the datastore client for the project with the given identifier.

        Paramaters
        ----------
        identifier: string
            Unique identifier for datastore

        Returns
        -------
        vizier.api.client.datastore.base.DatastoreClient
        """
        return DatastoreClient(
            urls=DatastoreClientUrlFactory(urls=UrlFactory(
                base_url=self.webservice_url),
                                           project_id=identifier))
示例#5
0
def WORKFLOW_HANDLE_LINKS(urls: UrlFactory,
                          project_id: str,
                          branch_id: str,
                          workflow_id: Optional[str] = None,
                          links: Optional[Dict[str, Optional[str]]] = None):
    """Get basic set of HATEOAS references for workflow handles.

    For an empty workflow the identifier is None. In that case the result will
    not contain a self reference.

    Parameters
    ----------
    urls: vizier.api.routes.base.UrlFactory
        Factory for resource urls
    project_id: string
        Unique project identifier
    branch_id: string
        Unique branch identifier
    workflow_id: string, optional
        Unique workflow identifier

    Returns
    -------
    dict
    """
    if links is None:
        links = dict()
    links[ref.WORKFLOW_APPEND] = urls.workflow_module_append(
        project_id=project_id, branch_id=branch_id)
    # References to the workflow branch
    links[ref.WORKFLOW_BRANCH] = urls.get_branch(project_id=project_id,
                                                 branch_id=branch_id)
    links[ref.BRANCH_HEAD] = urls.get_branch_head(project_id=project_id,
                                                  branch_id=branch_id)
    links[ref.WORKFLOW_PROJECT] = urls.get_project(project_id)
    links[ref.FILE_UPLOAD] = urls.upload_file(project_id)
    # Only include self reference if workflow identifier is given
    if not workflow_id is None:
        links[ref.SELF] = urls.get_workflow(project_id=project_id,
                                            branch_id=branch_id,
                                            workflow_id=workflow_id)
    return serialize.HATEOAS(links)
"""Test the vizier client datastore. This requires the web service API to run
and a project with datastores to have been setup.
"""

from vizier.api.client.base import VizierApiClient
from vizier.api.client.datastore.base import DatastoreClient
from vizier.api.routes.base import UrlFactory
from vizier.api.routes.datastore import DatastoreClientUrlFactory
from vizier.datastore.dataset import DatasetColumn, DatasetRow

from atexit import register as at_exit

URLS = UrlFactory(base_url='http://localhost:5000/vizier-db/api/v1')

api = VizierApiClient(URLS)
PROJECT_ID = api.create_project({"name": "Test Client Datastore"}).identifier

at_exit(api.delete_project, PROJECT_ID)

# We're just doing some unit testing on the fields specific to DatastoreClient, so
# ignore complaints about instantiating an abstract class
store = DatastoreClient(  # type: ignore[abstract]
    urls=DatastoreClientUrlFactory(urls=URLS, project_id=PROJECT_ID))

ds = store.create_dataset(columns=[
    DatasetColumn(identifier=0, name='Name'),
    DatasetColumn(identifier=1, name='Age', data_type="int")
],
                          rows=[
                              DatasetRow(identifier=0, values=['Alice', 32]),
                              DatasetRow(identifier=1, values=['Bob', 23])
示例#7
0
def MODULE_HANDLE(project: "ProjectHandle",
                  branch: BranchHandle,
                  module: ModuleHandle,
                  urls: UrlFactory,
                  workflow: Optional[WorkflowHandle] = None,
                  charts: List[ChartViewHandle] = None,
                  include_self: bool = True) -> Dict[str, Any]:
    """Dictionary serialization for a handle in the workflow at the branch
    head.

    The list of references will only contain a self referene if the include_self
    flag is True.

    Parameters
    ----------
    project: vizier.engine.project.base.ProjectHandle
        Handle for the containing project
    branch : vizier.viztrail.branch.BranchHandle
        Branch handle
    workflow: vizier.viztrail.workflow.WorkflowHandle
        Workflow handle
    module: vizier.viztrail.module.base.ModuleHandle
        Module handle
    charts: list(vizier.view.chart.ChartViewHandle)
        List of handles for available chart views
    urls: vizier.api.routes.base.UrlFactory
        Factory for resource urls
    include_self: bool, optional
        Indicate if self link is included

    Returns
    -------
    dict
    """
    project_id = project.identifier
    branch_id = branch.identifier
    module_id = module.identifier
    cmd = module.command
    timestamp = module.timestamp
    actual_workflow = branch.get_head() if workflow is None else workflow
    obj: Dict[str, Any] = {
        labels.ID:
        module_id,
        'state':
        module.state,
        labels.COMMAND: {
            labels.COMMAND_PACKAGE: cmd.package_id,
            labels.COMMAND_ID: cmd.command_id,
            labels.COMMAND_ARGS: cmd.arguments.to_list()
        },
        'text':
        module.external_form,
        labels.TIMESTAMPS: {
            labels.CREATED_AT: timestamp.created_at.isoformat()
        },
        labels.LINKS:
        serialize.HATEOAS({} if module_id is None else {
            ref.MODULE_INSERT:
            urls.workflow_module_insert(project_id=project_id,
                                        branch_id=branch_id,
                                        module_id=module_id)
        })
    }
    if include_self:
        obj[labels.LINKS].extend(
            serialize.HATEOAS({} if module_id is None else {
                ref.SELF:
                urls.get_workflow_module(project_id=project_id,
                                         branch_id=branch_id,
                                         module_id=module_id),
                ref.MODULE_DELETE:
                urls.workflow_module_delete(project_id=project_id,
                                            branch_id=branch_id,
                                            module_id=module_id),
                ref.MODULE_REPLACE:
                urls.workflow_module_replace(project_id=project_id,
                                             branch_id=branch_id,
                                             module_id=module_id)
            }))
    if not timestamp.started_at is None:
        obj[labels.TIMESTAMPS][
            labels.STARTED_AT] = timestamp.started_at.isoformat()
    # Add outputs and datasets if module is not active.
    if not module.is_active:
        artifacts: Dict[str, ArtifactDescriptor] = dict()
        for precursor in actual_workflow.modules:
            artifacts = precursor.provenance.get_database_state(artifacts)
            if precursor == module:
                break
        datasets = list()
        other_artifacts = list()
        for artifact_name in artifacts:
            artifact = artifacts[artifact_name]
            if artifact.is_dataset:
                datasets.append(
                    serialds.DATASET_IDENTIFIER(identifier=artifact.identifier,
                                                name=artifact_name))
            else:
                other_artifacts.append(
                    serialds.ARTIFACT_DESCRIPTOR(artifact=artifact,
                                                 project=project_id))
        available_charts = list()
        if charts is not None:
            for c_handle in charts:
                available_charts.append({
                    labels.NAME:
                    c_handle.chart_name,
                    labels.LINKS:
                    serialize.HATEOAS({} if module_id is None else {
                        ref.SELF:
                        urls.get_chart_view(project_id=project_id,
                                            branch_id=branch_id,
                                            workflow_id=actual_workflow.
                                            identifier,
                                            module_id=module_id,
                                            chart_id=c_handle.identifier)
                    })
                })
        obj[labels.DATASETS] = datasets
        obj[labels.CHARTS] = available_charts
        obj[labels.OUTPUTS] = serialize.OUTPUTS(module.outputs)
        obj[labels.ARTIFACTS] = other_artifacts
        if not timestamp.finished_at is None:
            obj[labels.TIMESTAMPS][
                labels.FINISHED_AT] = timestamp.finished_at.isoformat()
    else:
        # Add empty lists for outputs, datasets and charts if the module is
        # active
        obj[labels.DATASETS] = list()
        obj[labels.CHARTS] = list()
        obj[labels.OUTPUTS] = serialize.OUTPUTS(ModuleOutputs())
        obj[labels.ARTIFACTS] = list()
    return obj
示例#8
0
def WORKFLOW_HANDLE(project: "ProjectHandle", branch: "BranchHandle",
                    workflow: "WorkflowHandle",
                    urls: UrlFactory) -> Dict[str, Any]:
    """Dictionary serialization for a workflow handle.

    Parameters
    ----------
    project: vizier.engine.project.base.ProjectHandle
        Handle for the containing project
    branch : vizier.viztrail.branch.BranchHandle
        Branch handle
    workflow: vizier.viztrail.workflow.WorkflowHandle
        Workflow handle
    urls: vizier.api.routes.base.UrlFactory
        Factory for resource urls

    Returns
    -------
    dict
    """
    project_id = project.identifier
    branch_id = branch.identifier
    workflow_id = workflow.identifier
    descriptor = workflow.descriptor
    read_only = (branch.get_head().identifier != workflow_id)
    # Create lists of module handles and dataset handles
    modules = list()
    datasets = dict()
    dataset_names = list()
    dataobjects = dict()
    charts = dict()
    for m in workflow.modules:
        if not m.provenance.charts is None:
            for chart_name, chart in m.provenance.charts:
                charts[chart_name] = chart
        available_charts = list()
        # Only include charts for modules that completed successful
        for artifact in m.artifacts:
            if artifact.is_dataset:
                datasets[artifact.identifier] = serialds.DATASET_DESCRIPTOR(
                    dataset=artifact, project=project, urls=urls)
                dataset_names.append(artifact.name)
            else:
                dataobjects[
                    artifact.identifier] = serialds.ARTIFACT_DESCRIPTOR(
                        artifact=artifact, project=project, urls=urls)
        if m.is_success:
            for c_handle in list(charts.values()):
                if c_handle.dataset_name in dataset_names:
                    available_charts.append(c_handle)
        modules.append(
            serialmd.MODULE_HANDLE(project=project,
                                   branch=branch,
                                   workflow=workflow,
                                   module=m,
                                   charts=available_charts,
                                   urls=urls,
                                   include_self=(not read_only)))
    handle_links: Optional[Dict[str, Optional[str]]] = None
    if workflow.is_active:
        handle_links = {
            ref.WORKFLOW_CANCEL:
            urls.cancel_workflow(project_id=project_id, branch_id=branch_id)
        }
    links = WORKFLOW_HANDLE_LINKS(project_id=project_id,
                                  branch_id=branch_id,
                                  workflow_id=workflow_id,
                                  urls=urls,
                                  links=handle_links)
    return {
        'id': workflow_id,
        'createdAt': descriptor.created_at.isoformat(),
        'action': descriptor.action,
        labels.COMMAND_PACKAGE: descriptor.package_id,
        labels.COMMAND_ID: descriptor.command_id,
        'state': workflow.get_state().state,
        'modules': modules,
        'datasets': list(datasets.values()),
        'dataobjects': list(dataobjects.values()),
        'readOnly': read_only,
        labels.LINKS: links
    }
示例#9
0
def DATASET_HANDLE(
        project: ProjectHandle,
        dataset: DatasetHandle,
        rows: List[DatasetRow],
        defaults: Any,  # ConfigObject uses type hacking... pretend it's an any
        urls: UrlFactory,
        offset: int = 0,
        limit: int = -1):
    """Dictionary serialization for dataset handle. Includes (part of) the
    dataset rows.

    Parameters
    ----------
    project: vizier.engine.project.base.ProjectHandle
        Handle for project containing the dataset
    dataset : vizier.datastore.dataset.DatasetDescriptor
        Dataset descriptor
    rows: list(vizier.datastore.dataset.DatasetRow)
        List of rows from the dataset
    defaults : vizier.config.base.ConfigObject
        Web service default values
    urls: vizier.api.routes.base.UrlFactory
        Factory for resource urls
    offset: int, optional
        Number of rows at the beginning of the list that are skipped.
    limit: int, optional
        Limits the number of rows that are returned.

    Returns
    -------
    dict
    """
    # Use the dataset descriptor as the base
    obj = DATASET_DESCRIPTOR(dataset=dataset, project=project, urls=urls)
    # Serialize rows. The default dictionary representation for a row does
    # not include the row index position nor the annotation information.
    serialized_rows = list()
    for row in rows:
        serialized_rows.append(DATASET_ROW(row))
    # Serialize the dataset schema and cells
    obj[labels.ROWS] = serialized_rows
    obj[labels.ROWCOUNT] = dataset.row_count
    obj[labels.OFFSET] = offset
    obj[labels.PROPERTIES] = dataset.get_properties()
    # Add pagination references
    links = obj[labels.LINKS]
    # Max. number of records shown
    if not limit is None and int(limit) >= 0:
        max_rows_per_request = int(limit)
    elif defaults.row_limit >= 0:
        max_rows_per_request = defaults.row_limit
    elif defaults.max_row_limit >= 0:
        max_rows_per_request = defaults.max_row_limit
    else:
        max_rows_per_request = -1
    # List of pagination Urls
    # FIRST: Always include Url's to access the first page
    project_id = project.identifier
    dataset_id = dataset.identifier
    links.extend(
        serialize.HATEOAS({
            ref.PAGE_FIRST:
            urls.dataset_pagination(project_id=project_id,
                                    dataset_id=dataset_id,
                                    offset=offset,
                                    limit=limit)
        }))
    # PREV: If offset is greater than zero allow to fetch previous page
    if not offset is None and offset > 0:
        if max_rows_per_request >= 0:
            if offset > 0:
                prev_offset = max(offset - max_rows_per_request, 0)
                links.extend(
                    serialize.HATEOAS({
                        ref.PAGE_PREV:
                        urls.dataset_pagination(project_id=project_id,
                                                dataset_id=dataset_id,
                                                offset=prev_offset,
                                                limit=limit)
                    }))
    # NEXT & LAST: If there are rows beyond the current offset+limit include
    # Url's to fetch next page and last page.
    if offset < dataset.row_count and max_rows_per_request >= 0:
        next_offset = offset + max_rows_per_request
        if next_offset < dataset.row_count:
            links.extend(
                serialize.HATEOAS({
                    ref.PAGE_NEXT:
                    urls.dataset_pagination(project_id=project_id,
                                            dataset_id=dataset_id,
                                            offset=next_offset,
                                            limit=limit)
                }))
        last_offset = (dataset.row_count - max_rows_per_request)
        if last_offset > offset:
            links.extend(
                serialize.HATEOAS({
                    ref.PAGE_LAST:
                    urls.dataset_pagination(project_id=project_id,
                                            dataset_id=dataset_id,
                                            offset=last_offset,
                                            limit=limit)
                }))
    # Return pagination Url list
    return obj