def create_plot(ds_name, chart_name, series): """Module specification to create a simple plot. Parameters ---------- ds_name: string Dataset name chart_name: string Name of the chart series: list() Specification of data series Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification( PACKAGE_PLOT, PLOT_SIMPLE_CHART, { PARA_DATASET: ds_name, PARA_NAME: chart_name, PARA_SERIES: series, PARA_CHART: { PARA_CHART_TYPE: 'bar', PARA_CHART_GROUPED: False } })
def mimir_missing_key(dataset_name, column, missing_only=None, make_input_certain=False): """Create a Mimir Missing Key Lens. Parameters ---------- dataset_name: string Name of the dataset column: string or int Name or index for column missing_only: boolean, optional Optional MISSING_ONLY parameter make_input_certain: bool, optional Flag indicating whether input should be made certain Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification( PACKAGE_MIMIR, MIMIR_MISSING_KEY, { PARA_DATASET: dataset_name, PARA_COLUMN: column, PARA_MAKE_CERTAIN: make_input_certain })
def mimir_missing_value(dataset_name, columns, make_input_certain=False): """Create a Mimir Missing Value Lens. Parameters ---------- dataset_name: string Name of the dataset column: string or int Name or index for column constraint: string, optional Optional value constraint make_input_certain: bool, optional Flag indicating whether input should be made certain Returns ------- vizier.workflow.module.ModuleSpecification """ args = { PARA_DATASET: dataset_name, PARA_COLUMNS: columns, PARA_MAKE_CERTAIN: make_input_certain } return ModuleSpecification(PACKAGE_MIMIR, MIMIR_MISSING_VALUE, args)
def insert_module(project_id, branch_id, version, module_id): """Insert a module into a workflow branch before the specified module and execute the resulting workflow. Request ------- { "type": "string", "id": "string", "arguments": {} } """ # Abort with BAD REQUEST if request body is not in Json format or does not # contain a command key. cmd = validate_json_request(request, required=['type', 'id', 'arguments']) # Extend and execute workflow. This will throw a ValueError if the command # cannot be parsed. try: # Result is None if project or workflow version are not found. wf = api.append_module(project_id, branch_id, version, ModuleSpecification(cmd['type'], cmd['id'], cmd['arguments']), before_id=module_id) if not wf is None: return jsonify(wf) raise ResourceNotFound('unknown workflow module \'' + project_id + ':' + branch_id + ':' + str(version) + '[' + str(module_id) + ']\'') except ValueError as ex: raise InvalidRequest(str(ex))
def update_cell(dataset_name, column, row, value): """Update a dataset cell value. Parameters ---------- dataset_name: string Name of the dataset column: string or int Cell Columne name or index row: int Cell row index value: string New cell value Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification( PACKAGE_VIZUAL, VIZUAL_UPD_CELL, { PARA_DATASET: dataset_name, PARA_COLUMN: column, PARA_ROW: row, 'value': value })
def mimir_schema_matching(dataset_name, schema, result_name, make_input_certain=False): """Create a Mimir Schema Matching Lens. Parameters ---------- dataset_name: string Name of the dataset schema: list(dict) List of objects containing 'column' and 'type' elements make_input_certain: bool, optional Flag indicating whether input should be made certain Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification( PACKAGE_MIMIR, MIMIR_SCHEMA_MATCHING, { PARA_DATASET: dataset_name, PARA_SCHEMA: schema, PARA_RESULT_DATASET: result_name, PARA_MAKE_CERTAIN: make_input_certain })
def append_branch_head(project_id, branch_id): """Append a module to the workflow that is at the HEAD of the given branch. Request ------- { "type": "string", "id": "string", "arguments": {} } """ # Abort with BAD REQUEST if request body is not in Json format or does not # contain a command key. cmd = validate_json_request(request, required=['type', 'id', 'arguments']) # Extend and execute workflow. This will throw a ValueError if the command # cannot be parsed. try: # Result is None if project or workflow version are not found. wf = api.append_module(project_id, branch_id, -1, ModuleSpecification(cmd['type'], cmd['id'], cmd['arguments']), before_id=-1) if not wf is None: return jsonify(wf) raise ResourceNotFound('unknown workflow \'' + project_id + ':' + branch_id + ':head\'') except ValueError as ex: raise InvalidRequest(str(ex))
def reload_projects(self): """Returns a list of descriptors for all projects that are currently contained in the project repository. Returns ------ dict """ projects = self.viztrails.list_viztrails() load_commands = [] for project in projects: branches = self.list_branches(project.identifier) for branch in branches['branches']: wf = self.get_workflow_modules(project.identifier, branch['id']) for module in wf['modules']: command = module['command'] if command['id'] == "LOAD": arguments = command['arguments'] newargs = {} for argument in arguments: newargs[argument['name']] = argument['value'] newcmd = { 'id': command['id'], 'type': command['type'], 'arguments': newargs } load_command = { 'project': project.identifier, 'branch': branch['id'], 'workflow': wf['version'], 'module': module['id'], 'command': newcmd } load_commands.append(load_command) failed_reloads = [] successful_reloads = [] for reload in load_commands: try: cmd = reload['command'] successful_reloads.append( self.replace_module( reload['project'], reload['branch'], reload['workflow'], reload['module'], ModuleSpecification(cmd['type'], cmd['id'], cmd['arguments']))) except: reload['error'] = traceback.format_exc() failed_reloads.append(reload) return { 'failed_reloads': failed_reloads, 'successful_reloads': successful_reloads }
def test_validate_vizual(self): """Test validation ofVizUAL cell command specifications.""" # DELETE COLUMN obj = cmd.delete_column('dataset', 'column') cmd.validate_command(self.command_repository, obj) obj.arguments['row'] = 'row' with self.assertRaises(ValueError): cmd.validate_command(self.command_repository, obj) obj = cmd.delete_column('dataset', 'column') del obj.arguments['dataset'] with self.assertRaises(ValueError): cmd.validate_command(self.command_repository, obj) obj = cmd.delete_column('dataset', 'column') obj.arguments['row'] = 'row' with self.assertRaises(ValueError): cmd.validate_command(self.command_repository, obj) # DELETE ROW obj = cmd.delete_row('dataset', 'row') cmd.validate_command(self.command_repository, obj) # INSERT COLUMN obj = cmd.insert_column('dataset', 1, 'A') cmd.validate_command(self.command_repository, obj) # INSERT ROW obj = cmd.insert_row('dataset', 1) cmd.validate_command(self.command_repository, obj) # LOAD DATASET obj = cmd.load_dataset('file', 'dataset', filename='My File') cmd.validate_command(self.command_repository, obj) # MOVE COLUMN obj = cmd.move_column('dataset', 'A', 2) cmd.validate_command(self.command_repository, obj) # MOVE ROW obj = cmd.move_row('dataset', 1, 2) cmd.validate_command(self.command_repository, obj) # RENAME COLUMN obj = cmd.rename_column('dataset', 'A', 'B') cmd.validate_command(self.command_repository, obj) # UPDATE CELL obj = cmd.update_cell('dataset', 'A', 1, 'X') cmd.validate_command(self.command_repository, obj) # Unknown VizUAL Command obj = { 'name': 'unknown', 'arguments': { 'dataset': '1', 'name': '2', 'position': '3' } } with self.assertRaises(ValueError): cmd.validate_command( self.command_repository, ModuleSpecification(cmd.PACKAGE_VIZUAL, 'unknown', obj))
def test_validate_python(self): """Test validation of python cell command specifications.""" cmd.validate_command(self.command_repository, cmd.python_cell('print 2')) with self.assertRaises(ValueError): cmd.validate_command( self.command_repository, ModuleSpecification(cmd.PACKAGE_PYTHON, cmd.PYTHON_CODE, {'content': 'abc'})) obj = cmd.python_cell('print 2') obj.arguments['content'] = 'abc' with self.assertRaises(ValueError): cmd.validate_command(self.command_repository, obj)
def python_cell(source): """Module specification for a Python cell. Parameters ---------- source: string Python code for cell body Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification(PACKAGE_PYTHON, PYTHON_CODE, {PYTHON_SOURCE: source})
def drop_dataset(dataset_name): """Drop a dataset. Parameters ---------- dataset_name: string Name of the dataset Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_DROP_DS, {PARA_DATASET: dataset_name})
def sql_cell(output_ds_name, source): """Module specification for a SQL cell. Parameters ---------- ds_name: string Input dataset name source: string SQL code for cell body Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification(PACKAGE_SQL, SQL_CODE, { PARA_OUTPUT_DATASET: output_ds_name, SQL_SOURCE: source })
def delete_row(dataset_name, row): """Delete dataset row. Parameters ---------- dataset_name: string Name of the dataset row: int Index for row that is being deleted Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_DEL_ROW, { PARA_DATASET: dataset_name, PARA_ROW: row })
def delete_column(dataset_name, column): """Delete dataset column. Parameters ---------- dataset_name: string Name of the dataset column: string or int Name or index for column that is being deleted Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_DEL_COL, { PARA_DATASET: dataset_name, PARA_COLUMN: column })
def rename_dataset(dataset_name, new_name): """Rename a dataset. Parameters ---------- dataset_name: string Name of the dataset new_name: string New dataset name Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_REN_DS, { PARA_DATASET: dataset_name, PARA_NAME: new_name })
def insert_row(dataset_name, position): """Insert a row into a dataset. Parameters ---------- dataset_name: string Name of the dataset position: int Index position where row is inserted Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_INS_ROW, { PARA_DATASET: dataset_name, PARA_POSITION: position })
def move_column(dataset_name, column, position): """Move a column in a dataset. Parameters ---------- dataset_name: string Name of the dataset column: string or int Name or index for column that is being moves position: int Index position where column is moved to Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_MOV_COL, { PARA_DATASET: dataset_name, PARA_COLUMN: column, PARA_POSITION: position })
def insert_column(dataset_name, position, name): """Insert a column into a dataset. Parameters ---------- dataset_name: string Name of the dataset position: int Index position where column is inserted name: string New column name Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_INS_COL, { PARA_DATASET: dataset_name, PARA_POSITION: position, PARA_NAME: name })
def rename_column(dataset_name, column, name): """Rename a dataset column. Parameters ---------- dataset_name: string Name of the dataset column: string or int Name or index for column that is being renamed name: string New column name Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_REN_COL, { PARA_DATASET: dataset_name, PARA_COLUMN: column, PARA_NAME: name })
def move_row(dataset_name, row, position): """Move a row in a dataset. Parameters ---------- dataset_name: string Name of the dataset row: int Index of row that is being moved position: int Index position where row is moved Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_MOV_ROW, { PARA_DATASET: dataset_name, PARA_ROW: row, PARA_POSITION: position })
def load_dataset(file_id, dataset_name, filename=None, url=None, infer_types=False, detect_headers=False, load_format='csv', load_options=None): """Load dataset from file. Expects file identifier and new dataset name. Parameters ---------- file_id: string Unique file identifier dataset_name: string Name for the new dataset filename: string, optional Optional name of the source file url: string, optional Optional Url of the source file Returns ------- vizier.workflow.module.ModuleSpecification """ file = {'fileid': file_id} if not filename is None: file['filename'] = filename if not url is None: file['url'] = url return ModuleSpecification( PACKAGE_VIZUAL, VIZUAL_LOAD, { PARA_FILE: file, PARA_NAME: dataset_name, PARA_LOAD_TI: infer_types, PARA_LOAD_DH: detect_headers, PARA_LOAD_FORMAT: load_format, PARA_LOAD_OPTIONS: load_options })
def mimir_key_repair(dataset_name, column, make_input_certain=False): """Create a Mimir Key Repair Lens. Parameters ---------- dataset_name: string Name of the dataset column: string or int Name or index for column make_input_certain: bool, optional Flag indicating whether input should be made certain Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification( PACKAGE_MIMIR, MIMIR_KEY_REPAIR, { PARA_DATASET: dataset_name, PARA_COLUMN: column, PARA_MAKE_CERTAIN: make_input_certain })
def replace_module(project_id, branch_id, version, module_id): """Replace a module in the current project workflow branch and execute the resulting workflow. Request ------- { "type": "string", "id": "string", "arguments": {} } """ # Abort with BAD REQUEST if request body is not in Json format or does not # contain a command key. cmd = validate_json_request(request, required=['type', 'id', 'arguments'], optional=['includeDataset']) # The optional include dataset argument is used to include the updated # dataset in the response. includeDataset = None if 'includeDataset' in cmd: includeDataset = cmd['includeDataset'] # Extend and execute workflow. This will throw a ValueError if the command # cannot be parsed. try: # Result is None if project or workflow version are not found. wf = api.replace_module(project_id, branch_id, version, module_id, ModuleSpecification(cmd['type'], cmd['id'], cmd['arguments']), includeDataset=includeDataset) if not wf is None: return jsonify(wf) raise ResourceNotFound('unknown workflow module \'' + project_id + ':' + branch_id + ':' + str(module_id) + '\'') except ValueError as ex: raise InvalidRequest(str(ex))
def mimir_type_inference(dataset_name, percent_conform, make_input_certain=False): """Create a Mimir Type Inference Lens. Parameters ---------- dataset_name: string Name of the dataset percent_conform: float Percent that conforms make_input_certain: bool, optional Flag indicating whether input should be made certain Returns ------- vizier.workflow.module.ModuleSpecification """ return ModuleSpecification( PACKAGE_MIMIR, MIMIR_TYPE_INFERENCE, { PARA_DATASET: dataset_name, PARA_PERCENT_CONFORM: percent_conform, PARA_MAKE_CERTAIN: make_input_certain })
def mimir_geocode(dataset_name, geocoder, house_nr=None, street=None, city=None, state=None, make_input_certain=False): """Create a Mimir Missing Value Lens. Parameters ---------- dataset_name: string Name of the dataset column: string or int Name or index for column make_input_certain: bool, optional Flag indicating whether input should be made certain Returns ------- vizier.workflow.module.ModuleSpecification """ args = { PARA_DATASET: dataset_name, PARA_GEOCODER: geocoder, PARA_MAKE_CERTAIN: make_input_certain } if not house_nr is None: args[PARA_HOUSE_NUMBER] = house_nr if not street is None: args[PARA_STREET] = street if not city is None: args[PARA_CITY] = city if not state is None: args[PARA_STATE] = state return ModuleSpecification(PACKAGE_MIMIR, MIMIR_GEOCODE, args)
def mimir_picker(dataset_name, schema, pick_as=None, make_input_certain=False): """Create a Mimir Picker Lens. Parameters ---------- dataset_name: string Name of the dataset schema: list(dict) List of objects containing 'pickFrom' and 'pickAs' elements make_input_certain: bool, optional Flag indicating whether input should be made certain Returns ------- vizier.workflow.module.ModuleSpecification """ args = { PARA_DATASET: dataset_name, PARA_SCHEMA: schema, PARA_MAKE_CERTAIN: make_input_certain } if not pick_as is None: args[PARA_PICKAS] = pick_as return ModuleSpecification(PACKAGE_MIMIR, MIMIR_PICKER, args)
def test_validate_plot(self): """Test validation of plot cell command specifications.""" plot = ModuleSpecification( 'plot', 'CHART', { u'series': [{ u'series_column': u'A', u'series_label': 'Fatal', u'series_range': '0:20' }, { u'series_column': u'B' }], u'chart': { u'chartType': u'Bar Chart', u'chartGrouped': True }, u'name': u'My Chart', u'xaxis': { u'xaxis_column': u'Year' }, u'dataset': u'accidents' }) cmd.validate_command(self.command_repository, plot)