def BRANCH_LISTING(viztrail, urls): """Dictionary serialization for listing of viztrail branches. Parameters ---------- viztrail : vizier.workflow.base.ViztrailHandle Viztrail handle urls: vizier.hateoas.UrlFactory Factory for resource urls Returns ------- dict """ vt_id = viztrail.identifier branches_url = urls.branches_url(vt_id) return { 'branches': [ BRANCH_DESCRIPTOR( viztrail, viztrail.branches[b], urls ) for b in viztrail.branches ], JSON_REFERENCES : [ self_reference(branches_url), reference(hateoas.REL_CREATE, branches_url), reference(hateoas.REL_PROJECT, urls.project_url(vt_id)) ] }
def FILE_HANDLE(f_handle, urls): """Dictionary serialization for dataset instance. Parameters ---------- f_handle : database.fileserver.FileHandle Handle for file server resource urls: vizier.hateoas.UrlFactory Factory for resource urls Returns ------- dict """ self_ref = urls.file_url(f_handle.identifier) return { 'id': f_handle.identifier, 'name' : f_handle.name, 'columns' : f_handle.columns, 'rows': f_handle.rows, 'filesize': f_handle.filesize, 'createdAt': f_handle.created_at.isoformat(), 'lastModifiedAt': f_handle.last_modified_at.isoformat(), JSON_REFERENCES : [ self_reference(self_ref), reference(hateoas.REL_DELETE, self_ref), reference(hateoas.REL_RENAME, self_ref), reference( hateoas.REL_DOWNLOAD, urls.file_download_url(f_handle.identifier) ) ] }
def DATASET(dataset, rows, config, urls, offset=0, limit=-1): """Dictionary serialization for (part of the ) dataset state. Parameters ---------- dataset : vizier.datastore.base.DatasetHandle Handle for dataset rows: list() List of rows from the dataset config : vizier.config.AppConfig Application configuration parameters urls: vizier.hateoas.UrlFactory Factory for resource urls offset: int, optional Number of rows at the beginning of the list that are skipped. limit: int, optional Limits the number of rows that are returned. Returns ------- dict """ dataset_id = dataset.identifier # Serialize rows. The default dictionary representation for a row does # not include the row index position nor the annotation information. serialized_rows = list() annotated_cells = list() for row in rows: obj = row.to_dict() obj['index'] = len(serialized_rows) + offset serialized_rows.append(obj) for i in range(len(dataset.columns)): if row.cell_annotations[i] == True: annotated_cells.append({ 'column': dataset.columns[i].identifier, 'row': row.identifier }) # Serialize the dataset schema and cells obj = { 'id' : dataset_id, 'columns' : [col.to_dict() for col in dataset.columns], 'rows': serialized_rows, 'offset': offset, 'rowcount': dataset.row_count, 'annotatedCells': annotated_cells } # Add references if dataset exists obj[JSON_REFERENCES] = [ self_reference(urls.dataset_url(dataset_id)), reference( hateoas.REL_DOWNLOAD, urls.dataset_download_url(dataset_id) ), reference( hateoas.REL_ANNOTATIONS, urls.dataset_annotations_url(dataset_id) ) ] + DATASET_PAGINATION_URLS(dataset, config, urls, offset=offset, limit=limit) return obj
def WORKFLOW_MODULES(viztrail, workflow, config, urls, dataset_cache, read_only=False): """Dictionary representaion for list of modules in a workflow. Parameters ---------- viztrail : vizier.workflow.base.ViztrailHandle Viztrail handle workflow : vizier.workflow.base.WorkflowHandle Workflow handle config : vizier.config.AppConfig Application configuration parameters urls: vizier.hateoas.UrlFactory Factory for resource urls dataset_cache: func Function to get dataset handle for given identifier read_only: bool, oprional Value for the read only flag in the workflow serialization Returns ------- dict """ branch = viztrail.branches[workflow.branch_id] version = workflow.version created_at = workflow.created_at obj = { 'version': version, 'createdAt': created_at.isoformat(), } obj['project'] = PROJECT_DESCRIPTOR(viztrail, urls) obj['branch'] = BRANCH_DESCRIPTOR(viztrail, branch, urls) obj['readOnly'] = read_only # Resource references obj[JSON_REFERENCES] = [ self_reference( urls.workflow_modules_url( viztrail.identifier, branch.identifier, workflow.version ) ), reference( hateoas.REL_WORKFLOW, urls.workflow_url( viztrail.identifier, branch.identifier, workflow.version ) ) ] return add_modules(obj, viztrail, workflow, config, urls, dataset_cache)
def WORKFLOW_DESCRIPTOR(viztrail, branch, version, created_at, urls): """Get dictionary representaion for a workflow descriptor. Parameters ---------- viztrail : vizier.workflow.base.ViztrailHandle Viztrail handle branch : vizier.workflow.base.ViztrailBranch Workflow handle version: int Workflow version identifier created_at: datetime.datetime Timestamp for workflow creation urls: vizier.hateoas.UrlFactory Factory for resource urls Returns ------- dict """ # The workflow version may be negative for the HEAD of an empty master # branch (i.e., a newly created viztrail). In this cas we use a # different Url vt_id = viztrail.identifier branch_id = branch.identifier if version < 0: self_ref = urls.branch_head_url(vt_id, branch_id) append_url = urls.branch_head_append_url(vt_id, branch_id) else: self_ref = urls.workflow_url(vt_id, branch_id, version) append_url = urls.workflow_append_url(vt_id, branch_id, version) modules_url = urls.workflow_modules_url(vt_id, branch_id, version) # Return serialization return { 'version': version, 'createdAt': created_at.isoformat(), JSON_REFERENCES: [ self_reference(self_ref), reference(hateoas.REL_BRANCH, urls.branch_url(vt_id, branch_id)), reference(hateoas.REL_BRANCHES, urls.branches_url(vt_id)), reference(hateoas.REL_APPEND, append_url), reference(hateoas.REL_MODULES, modules_url) ] }
def PROJECT_DESCRIPTOR(viztrail, urls): """Dictionary serialization for project fundamental project metadata. Parameters ---------- viztrail : vizier.workflow.base.ViztrailHandle Viztrail handle urls: vizier.hateoas.UrlFactory Factory for resource urls Returns ------- dict """ project_url = urls.project_url(viztrail.identifier) properties = viztrail.properties.get_properties() return { 'id': viztrail.identifier, 'environment': viztrail.env_id, 'createdAt': viztrail.created_at.isoformat(), 'lastModifiedAt': viztrail.last_modified_at.isoformat(), 'properties': [ {'key' : key, 'value' : properties[key]} for key in properties ], JSON_REFERENCES : [ self_reference(project_url), reference(hateoas.REL_DELETE, project_url), reference(hateoas.REL_SERVICE, urls.service_url()), reference( hateoas.REL_UPDATE, urls.update_project_properties_url(viztrail.identifier) ), reference( hateoas.REL_BRANCHES, urls.branches_url(viztrail.identifier) ), reference( hateoas.REL_MODULE_SPECS, urls.project_module_specs_url(viztrail.identifier) ) ] }
def SERVICE_BUILD(components, urls): """Dictionary serialization for service build information. Parameters ---------- components: dict Dictionary serialization of individual components url Returns ------- dict """ return { 'components' : components, JSON_REFERENCES : [ self_reference(urls.system_build_url()), reference(hateoas.REL_SERVICE, urls.service_url()) ] }
def DATASET_CHART_VIEW(view, rows, self_ref): """Dictionary serialization for chart view data results. Parameters ---------- view: vizier.plot.view.ChartViewHandle Handle defining the dataset chart view rows: list() List of rows in the query result self_ref: string Self-reference url for chart view data. Returns ------- dict """ obj = CHART_VIEW_DATA(view=view, rows=rows) obj['name'] = view.chart_name obj[JSON_REFERENCES] = [self_reference(self_ref)] return obj
def PROJECT_MODULE_SPECIFICATIONS(viztrail, urls): """Dictionary serialization for project module specifications. Parameters ---------- viztrail : vizier.workflow.base.ViztrailHandle Viztrail handle urls: vizier.hateoas.UrlFactory Factory for resource urls Returns ------- dict """ return { 'project': PROJECT_DESCRIPTOR(viztrail, urls), 'modules': COMMAND_REPOSITORY(viztrail.command_repository), JSON_REFERENCES : [ self_reference(urls.project_module_specs_url(viztrail.identifier)) ] }
def FILE_LISTING(files, urls): """Dictionary serialization for file listing. Parameters ---------- files: list(vizier.filestore.base.FileHandle) List of file handles urls: vizier.hateoas.UrlFactory Factory for resource urls Returns ------- dict """ return { 'files': [FILE_HANDLE(f, urls) for f in files], JSON_REFERENCES : [ self_reference(urls.files_url()), reference(hateoas.REL_UPLOAD, urls.files_upload_url()) ] }
def SERVICE_DESCRIPTOR(config, urls): """Dictionary serialization for service configuration and references. Parameters ---------- config : vizier.config.AppConfig Application configuration parameters urls: vizier.hateoas.UrlFactory Factory for resource urls Returns ------- dict """ return { 'name' : config.name, 'properties': [ ObjectProperty( PROP_FS_MAXFILESIZE, config.fileserver.max_file_size ).to_dict() ], 'envs': [{ 'id': config.envs[key].identifier, 'name': config.envs[key].name, 'description': config.envs[key].description, 'default': config.envs[key].default, 'packages': config.envs[key].packages } for key in config.envs ], JSON_REFERENCES : [ self_reference(urls.service_url()), reference(hateoas.REL_SYSTEM_BUILD, urls.system_build_url()), reference(hateoas.REL_NOTEBOOKS, urls.notebooks_url()), reference(hateoas.REL_PROJECTS, urls.projects_url()), reference(hateoas.REL_FILES, urls.files_url()), reference(hateoas.REL_UPLOAD, urls.files_upload_url()), reference(hateoas.REL_APIDOC, config.api.doc_url) ] }
def PROJECT_LISTING(projects, urls): """Dictionary serialization of a project listing. Parameters ---------- projects: list(vizier.workflow.base.ViztrailHandle) List of viztrail descriptors urls: vizier.hateoas.UrlFactory Factory for resource urls Returns ------- dict """ return { 'projects' : [PROJECT_DESCRIPTOR(wt, urls) for wt in projects], JSON_REFERENCES : [ self_reference(urls.projects_url()), reference(hateoas.REL_CREATE, urls.projects_url()), reference(hateoas.REL_SERVICE, urls.service_url()) ] }
def DATASET_ANNOTATIONS(dataset_id, annotations, column_id, row_id, urls): """Get dictionary serialization for dataset component annotations. Parameters ---------- dataset_id : string Unique dataset identifier annotations: list(vizier.datastore.metadata.Annotation) Set of annotations for dataset components column_id: int Unique column identifier for component row_id: int Unique row identifier for component urls: vizier.hateoas.UrlFactory Factory for resource urls Returns ------- dict """ obj = { 'annotations': [{ 'id': a.identifier, 'key': a.key, 'value': a.value } for a in annotations] } if column_id >= 0: obj['column'] = column_id if row_id >= 0: obj['row'] = row_id # Add references if dataset exists obj[JSON_REFERENCES] = [ self_reference(urls.dataset_annotations_url(dataset_id)), reference(hateoas.REL_DATASET, urls.dataset_url(dataset_id)) ] return obj
def BRANCH_DESCRIPTOR(viztrail, branch, urls): """Dictionary representaion for a branch descriptor. Parameters ---------- viztrail : vizier.workflow.base.ViztrailHandle Viztrail handle branch : vizier.workflow.base.ViztrailBranch Workflow handle urls: vizier.hateoas.UrlFactory Factory for resource urls Returns ------- dict """ vt_id = viztrail.identifier branch_id = branch.identifier properties = branch.properties.get_properties() self_ref = urls.branch_url(vt_id, branch_id) head_ref = urls.branch_head_url(vt_id, branch_id) project_ref = urls.project_url(vt_id) update_ref = urls.branch_update_url(vt_id, branch_id) return { 'id' : branch_id, 'properties' : [ {'key' : key, 'value' : properties[key]} for key in properties ], JSON_REFERENCES : [ self_reference(self_ref), reference(hateoas.REL_DELETE, self_ref), reference(hateoas.REL_HEAD, head_ref), reference(hateoas.REL_PROJECT, project_ref), reference(hateoas.REL_UPDATE, update_ref) ] }
def DATASET_DESCRIPTOR(dataset, config, urls): """Create dictionary serialization for dataset descriptor. Parameters ---------- dataset : vizier.datastore.base.DatasetHandle Handle for dataset config : vizier.config.AppConfig Application configuration parameters urls: vizier.hateoas.UrlFactory Factory for resource urls Returns ------- dict """ dataset_id = dataset.identifier return { 'id': dataset_id, 'columns' : [ {'id': col.identifier, 'name': col.name} for col in dataset.columns ], 'rows': dataset.row_count, JSON_REFERENCES : [ self_reference(urls.dataset_url(dataset_id)), reference( hateoas.REL_ANNOTATED, urls.dataset_with_annotations_url(dataset_id) ), reference( hateoas.REL_DOWNLOAD, urls.dataset_download_url(dataset_id) ) ] + DATASET_PAGINATION_URLS(dataset, config, urls) }
def WORKFLOW_HANDLE(viztrail, workflow, config, urls, dataset_cache, read_only=False): """Dictionary representaion for a workflow handle. Parameters ---------- viztrail : vizier.workflow.base.ViztrailHandle Viztrail handle workflow : vizier.workflow.base.WorkflowHandle Workflow handle config : vizier.config.AppConfig Application configuration parameters urls: vizier.hateoas.UrlFactory Factory for resource urls dataset_cache: func Function to get dataset handle for given identifier read_only: bool, oprional Value for the read only flag in the workflow serialization Returns ------- dict """ branch = viztrail.branches[workflow.branch_id] version = workflow.version created_at = workflow.created_at obj = WORKFLOW_DESCRIPTOR(viztrail, branch, version, created_at, urls) obj['project'] = PROJECT_DESCRIPTOR(viztrail, urls) obj['branch'] = BRANCH_DESCRIPTOR(viztrail, branch, urls) # Datasets and chart views in the current workflow state. charts = list() datasets = list() if not workflow.has_error and len(workflow.modules) > 0: # Create list of all datasets in the current workflow state. state_datasets = workflow.modules[-1].datasets for ds_name in state_datasets: dataset_id = state_datasets[ds_name] dataset = dataset_cache(dataset_id) ds_desc = DATASET_DESCRIPTOR(dataset, config, urls) # Make sure to add the dataset name to the descriptor ds_desc['name'] = ds_name datasets.append(ds_desc) # Create list of dataset chart views in the current workflow state. The # chart definitions have to be extracted from the outputs of the modules in # the view state_charts = dict() for module in workflow.modules[::-1]: for out in module.stdout: if out['type'] == O_CHARTVIEW: view = ChartViewHandle.from_dict(out['data']) if not view.chart_name in state_charts: state_charts[view.chart_name] = view for name in state_charts: view = state_charts[name] if view.dataset_name in state_datasets: view_url = urls.workflow_module_view_url( viztrail.identifier, branch.identifier, version, workflow.modules[-1].identifier, view.identifier ) charts.append({ 'id': name, 'name': name, JSON_REFERENCES: [ self_reference(view_url) ] }) obj['state'] = { 'datasets': datasets, 'charts': charts, 'hasError': workflow.has_error, 'moduleCount': len(workflow.modules) } obj['readOnly'] = read_only return obj
def MODULE_HANDLE(viztrail, branch, version, module, views, urls): """Get dictionary representaion for a workflow module handle. Parameters ---------- viztrail : vizier.workflow.base.ViztrailHandle Viztrail handle branch : vizier.workflow.base.ViztrailBranch Workflow handle module : vizier.workflow.module.ModuleHandle Handle for workflow module views: dict(vizier.plot.view.ChartViewHandle) Dictionary of available views indexed by their name. Returns ------- dict """ module_url = urls.workflow_module_url( viztrail.identifier, branch.identifier, version, module.identifier ) # Convert chart views in the module output to dictionaries that contain # a self reference for data access. In the first step we replace the # data value with the view name stdout = list() view_outputs = list() for obj in module.stdout: if obj['type'] == O_CHARTVIEW: view = ChartViewHandle.from_dict(obj['data']) if view.dataset_name in module.datasets: views[view.chart_name] = view # This is a bit tricky. Create a placeholder object and then # replace the data value with the serialized version of # the chart handle later on. Make sure to keep track of any # results that may be associated with the output placeholder = {'type': O_CHARTVIEW, 'data': view.chart_name} if 'result' in obj: placeholder['result'] = obj['result'] obj = placeholder view_outputs.append(obj) else: # Remove outputs that reference views accessing non-existent # datasets obj = None if not obj is None: stdout.append(obj) # Create a list of serialized view handles view_handles = dict() for view in views.values(): if view.dataset_name in module.datasets: view_url = urls.workflow_module_view_url( viztrail.identifier, branch.identifier, version, module.identifier, view.identifier ) v_serial = { 'name': view.chart_name, JSON_REFERENCES: [ self_reference(view_url) ] } view_handles[view.chart_name] = v_serial # Replace data in view outputs for obj in view_outputs: obj['data'] = view_handles[obj['data']] args = module.command.arguments return { 'id' : module.identifier, 'command': { 'type': module.command.module_type, 'id': module.command.command_identifier, 'arguments': [{'name': key, 'value': args[key]} for key in args] }, 'text': module.command_text, 'stdout': stdout, 'stderr': module.stderr, 'datasets': [{ 'id': module.datasets[d], 'name' : d } for d in sorted(module.datasets.keys()) ], 'views': view_handles.values(), JSON_REFERENCES: [ reference(hateoas.REL_DELETE, module_url), reference(hateoas.REL_INSERT, module_url), reference(hateoas.REL_REPLACE, module_url) ] }