Пример #1
0
def create_plot(ds_name, chart_name, series):
    """Module specification to create a simple plot.

    Parameters
    ----------
    ds_name: string
        Dataset name
    chart_name: string
        Name of the chart
    series: list()
        Specification of data series

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(
        PACKAGE_PLOT, PLOT_SIMPLE_CHART, {
            PARA_DATASET: ds_name,
            PARA_NAME: chart_name,
            PARA_SERIES: series,
            PARA_CHART: {
                PARA_CHART_TYPE: 'bar',
                PARA_CHART_GROUPED: False
            }
        })
Пример #2
0
def mimir_missing_key(dataset_name,
                      column,
                      missing_only=None,
                      make_input_certain=False):
    """Create a Mimir Missing Key Lens.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    column: string or int
        Name or index for column
    missing_only: boolean, optional
        Optional MISSING_ONLY parameter
    make_input_certain: bool, optional
        Flag indicating whether input should be made certain

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(
        PACKAGE_MIMIR, MIMIR_MISSING_KEY, {
            PARA_DATASET: dataset_name,
            PARA_COLUMN: column,
            PARA_MAKE_CERTAIN: make_input_certain
        })
Пример #3
0
def mimir_missing_value(dataset_name, columns, make_input_certain=False):
    """Create a Mimir Missing Value Lens.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    column: string or int
        Name or index for column
    constraint: string, optional
        Optional value constraint
    make_input_certain: bool, optional
        Flag indicating whether input should be made certain

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    args = {
        PARA_DATASET: dataset_name,
        PARA_COLUMNS: columns,
        PARA_MAKE_CERTAIN: make_input_certain
    }

    return ModuleSpecification(PACKAGE_MIMIR, MIMIR_MISSING_VALUE, args)
Пример #4
0
def insert_module(project_id, branch_id, version, module_id):
    """Insert a module into a workflow branch before the specified module and
    execute the resulting workflow.

    Request
    -------
    {
      "type": "string",
      "id": "string",
      "arguments": {}
    }
    """
    # Abort with BAD REQUEST if request body is not in Json format or does not
    # contain a command key.
    cmd = validate_json_request(request, required=['type', 'id', 'arguments'])
    # Extend and execute workflow. This will throw a ValueError if the command
    # cannot be parsed.
    try:
        # Result is None if project or workflow version are not found.
        wf = api.append_module(project_id,
                               branch_id,
                               version,
                               ModuleSpecification(cmd['type'], cmd['id'],
                                                   cmd['arguments']),
                               before_id=module_id)
        if not wf is None:
            return jsonify(wf)
        raise ResourceNotFound('unknown workflow module \'' + project_id +
                               ':' + branch_id + ':' + str(version) + '[' +
                               str(module_id) + ']\'')
    except ValueError as ex:
        raise InvalidRequest(str(ex))
Пример #5
0
def update_cell(dataset_name, column, row, value):
    """Update a dataset cell value.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    column: string or int
        Cell Columne name or index
    row: int
        Cell row index
    value: string
        New cell value

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(
        PACKAGE_VIZUAL, VIZUAL_UPD_CELL, {
            PARA_DATASET: dataset_name,
            PARA_COLUMN: column,
            PARA_ROW: row,
            'value': value
        })
Пример #6
0
def mimir_schema_matching(dataset_name,
                          schema,
                          result_name,
                          make_input_certain=False):
    """Create a Mimir Schema Matching Lens.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    schema: list(dict)
        List of objects containing 'column' and 'type' elements
    make_input_certain: bool, optional
        Flag indicating whether input should be made certain

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(
        PACKAGE_MIMIR, MIMIR_SCHEMA_MATCHING, {
            PARA_DATASET: dataset_name,
            PARA_SCHEMA: schema,
            PARA_RESULT_DATASET: result_name,
            PARA_MAKE_CERTAIN: make_input_certain
        })
Пример #7
0
def append_branch_head(project_id, branch_id):
    """Append a module to the workflow that is at the HEAD of the given branch.

    Request
    -------
    {
      "type": "string",
      "id": "string",
      "arguments": {}
    }
    """
    # Abort with BAD REQUEST if request body is not in Json format or does not
    # contain a command key.
    cmd = validate_json_request(request, required=['type', 'id', 'arguments'])
    # Extend and execute workflow. This will throw a ValueError if the command
    # cannot be parsed.
    try:
        # Result is None if project or workflow version are not found.
        wf = api.append_module(project_id,
                               branch_id,
                               -1,
                               ModuleSpecification(cmd['type'], cmd['id'],
                                                   cmd['arguments']),
                               before_id=-1)
        if not wf is None:
            return jsonify(wf)
        raise ResourceNotFound('unknown workflow \'' + project_id + ':' +
                               branch_id + ':head\'')
    except ValueError as ex:
        raise InvalidRequest(str(ex))
Пример #8
0
    def reload_projects(self):
        """Returns a list of descriptors for all projects that are currently
        contained in the project repository.

        Returns
        ------
        dict
        """
        projects = self.viztrails.list_viztrails()
        load_commands = []
        for project in projects:
            branches = self.list_branches(project.identifier)
            for branch in branches['branches']:
                wf = self.get_workflow_modules(project.identifier,
                                               branch['id'])
                for module in wf['modules']:
                    command = module['command']
                    if command['id'] == "LOAD":
                        arguments = command['arguments']
                        newargs = {}
                        for argument in arguments:
                            newargs[argument['name']] = argument['value']
                        newcmd = {
                            'id': command['id'],
                            'type': command['type'],
                            'arguments': newargs
                        }
                        load_command = {
                            'project': project.identifier,
                            'branch': branch['id'],
                            'workflow': wf['version'],
                            'module': module['id'],
                            'command': newcmd
                        }
                        load_commands.append(load_command)

        failed_reloads = []
        successful_reloads = []
        for reload in load_commands:
            try:
                cmd = reload['command']
                successful_reloads.append(
                    self.replace_module(
                        reload['project'], reload['branch'],
                        reload['workflow'], reload['module'],
                        ModuleSpecification(cmd['type'], cmd['id'],
                                            cmd['arguments'])))
            except:
                reload['error'] = traceback.format_exc()
                failed_reloads.append(reload)
        return {
            'failed_reloads': failed_reloads,
            'successful_reloads': successful_reloads
        }
Пример #9
0
 def test_validate_vizual(self):
     """Test validation ofVizUAL cell command specifications."""
     # DELETE COLUMN
     obj = cmd.delete_column('dataset', 'column')
     cmd.validate_command(self.command_repository, obj)
     obj.arguments['row'] = 'row'
     with self.assertRaises(ValueError):
         cmd.validate_command(self.command_repository, obj)
     obj = cmd.delete_column('dataset', 'column')
     del obj.arguments['dataset']
     with self.assertRaises(ValueError):
         cmd.validate_command(self.command_repository, obj)
     obj = cmd.delete_column('dataset', 'column')
     obj.arguments['row'] = 'row'
     with self.assertRaises(ValueError):
         cmd.validate_command(self.command_repository, obj)
     # DELETE ROW
     obj = cmd.delete_row('dataset', 'row')
     cmd.validate_command(self.command_repository, obj)
     # INSERT COLUMN
     obj = cmd.insert_column('dataset', 1, 'A')
     cmd.validate_command(self.command_repository, obj)
     # INSERT ROW
     obj = cmd.insert_row('dataset', 1)
     cmd.validate_command(self.command_repository, obj)
     # LOAD DATASET
     obj = cmd.load_dataset('file', 'dataset', filename='My File')
     cmd.validate_command(self.command_repository, obj)
     # MOVE COLUMN
     obj = cmd.move_column('dataset', 'A', 2)
     cmd.validate_command(self.command_repository, obj)
     # MOVE ROW
     obj = cmd.move_row('dataset', 1, 2)
     cmd.validate_command(self.command_repository, obj)
     # RENAME COLUMN
     obj = cmd.rename_column('dataset', 'A', 'B')
     cmd.validate_command(self.command_repository, obj)
     # UPDATE CELL
     obj = cmd.update_cell('dataset', 'A', 1, 'X')
     cmd.validate_command(self.command_repository, obj)
     # Unknown VizUAL Command
     obj = {
         'name': 'unknown',
         'arguments': {
             'dataset': '1',
             'name': '2',
             'position': '3'
         }
     }
     with self.assertRaises(ValueError):
         cmd.validate_command(
             self.command_repository,
             ModuleSpecification(cmd.PACKAGE_VIZUAL, 'unknown', obj))
Пример #10
0
 def test_validate_python(self):
     """Test validation of python cell command specifications."""
     cmd.validate_command(self.command_repository,
                          cmd.python_cell('print 2'))
     with self.assertRaises(ValueError):
         cmd.validate_command(
             self.command_repository,
             ModuleSpecification(cmd.PACKAGE_PYTHON, cmd.PYTHON_CODE,
                                 {'content': 'abc'}))
     obj = cmd.python_cell('print 2')
     obj.arguments['content'] = 'abc'
     with self.assertRaises(ValueError):
         cmd.validate_command(self.command_repository, obj)
Пример #11
0
def python_cell(source):
    """Module specification for a Python cell.

    Parameters
    ----------
    source: string
        Python code for cell body

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(PACKAGE_PYTHON, PYTHON_CODE,
                               {PYTHON_SOURCE: source})
Пример #12
0
def drop_dataset(dataset_name):
    """Drop a dataset.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_DROP_DS,
                               {PARA_DATASET: dataset_name})
Пример #13
0
def sql_cell(output_ds_name, source):
    """Module specification for a SQL cell.
    Parameters
    ----------
    ds_name: string
        Input dataset name
    source: string
        SQL code for cell body
    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(PACKAGE_SQL, SQL_CODE, {
        PARA_OUTPUT_DATASET: output_ds_name,
        SQL_SOURCE: source
    })
Пример #14
0
def delete_row(dataset_name, row):
    """Delete dataset row.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    row: int
        Index for row that is being deleted

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_DEL_ROW, {
        PARA_DATASET: dataset_name,
        PARA_ROW: row
    })
Пример #15
0
def delete_column(dataset_name, column):
    """Delete dataset column.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    column: string or int
        Name or index for column that is being deleted

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_DEL_COL, {
        PARA_DATASET: dataset_name,
        PARA_COLUMN: column
    })
Пример #16
0
def rename_dataset(dataset_name, new_name):
    """Rename a dataset.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    new_name: string
        New dataset name

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_REN_DS, {
        PARA_DATASET: dataset_name,
        PARA_NAME: new_name
    })
Пример #17
0
def insert_row(dataset_name, position):
    """Insert a row into a dataset.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    position: int
        Index position where row is inserted

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_INS_ROW, {
        PARA_DATASET: dataset_name,
        PARA_POSITION: position
    })
Пример #18
0
def move_column(dataset_name, column, position):
    """Move a column in a dataset.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    column: string or int
        Name or index for column that is being moves
    position: int
        Index position where column is moved to

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_MOV_COL, {
        PARA_DATASET: dataset_name,
        PARA_COLUMN: column,
        PARA_POSITION: position
    })
Пример #19
0
def insert_column(dataset_name, position, name):
    """Insert a column into a dataset.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    position: int
        Index position where column is inserted
    name: string
        New column name

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_INS_COL, {
        PARA_DATASET: dataset_name,
        PARA_POSITION: position,
        PARA_NAME: name
    })
Пример #20
0
def rename_column(dataset_name, column, name):
    """Rename a dataset column.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    column: string or int
        Name or index for column that is being renamed
    name: string
        New column name

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_REN_COL, {
        PARA_DATASET: dataset_name,
        PARA_COLUMN: column,
        PARA_NAME: name
    })
Пример #21
0
def move_row(dataset_name, row, position):
    """Move a row in a dataset.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    row: int
        Index of row that is being moved
    position: int
        Index position where row is moved

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(PACKAGE_VIZUAL, VIZUAL_MOV_ROW, {
        PARA_DATASET: dataset_name,
        PARA_ROW: row,
        PARA_POSITION: position
    })
Пример #22
0
def load_dataset(file_id,
                 dataset_name,
                 filename=None,
                 url=None,
                 infer_types=False,
                 detect_headers=False,
                 load_format='csv',
                 load_options=None):
    """Load dataset from file. Expects file identifier and new dataset name.

    Parameters
    ----------
    file_id: string
        Unique file identifier
    dataset_name: string
        Name for the new dataset
    filename: string, optional
        Optional name of the source file
    url: string, optional
        Optional Url of the source file

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    file = {'fileid': file_id}
    if not filename is None:
        file['filename'] = filename
    if not url is None:
        file['url'] = url
    return ModuleSpecification(
        PACKAGE_VIZUAL, VIZUAL_LOAD, {
            PARA_FILE: file,
            PARA_NAME: dataset_name,
            PARA_LOAD_TI: infer_types,
            PARA_LOAD_DH: detect_headers,
            PARA_LOAD_FORMAT: load_format,
            PARA_LOAD_OPTIONS: load_options
        })
Пример #23
0
def mimir_key_repair(dataset_name, column, make_input_certain=False):
    """Create a Mimir Key Repair Lens.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    column: string or int
        Name or index for column
    make_input_certain: bool, optional
        Flag indicating whether input should be made certain

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(
        PACKAGE_MIMIR, MIMIR_KEY_REPAIR, {
            PARA_DATASET: dataset_name,
            PARA_COLUMN: column,
            PARA_MAKE_CERTAIN: make_input_certain
        })
Пример #24
0
def replace_module(project_id, branch_id, version, module_id):
    """Replace a module in the current project workflow branch and execute the
    resulting workflow.

    Request
    -------
    {
      "type": "string",
      "id": "string",
      "arguments": {}
    }
    """
    # Abort with BAD REQUEST if request body is not in Json format or does not
    # contain a command key.
    cmd = validate_json_request(request,
                                required=['type', 'id', 'arguments'],
                                optional=['includeDataset'])
    # The optional include dataset argument is used to include the updated
    # dataset in the response.
    includeDataset = None
    if 'includeDataset' in cmd:
        includeDataset = cmd['includeDataset']
    # Extend and execute workflow. This will throw a ValueError if the command
    # cannot be parsed.
    try:
        # Result is None if project or workflow version are not found.
        wf = api.replace_module(project_id,
                                branch_id,
                                version,
                                module_id,
                                ModuleSpecification(cmd['type'], cmd['id'],
                                                    cmd['arguments']),
                                includeDataset=includeDataset)
        if not wf is None:
            return jsonify(wf)
        raise ResourceNotFound('unknown workflow module \'' + project_id +
                               ':' + branch_id + ':' + str(module_id) + '\'')
    except ValueError as ex:
        raise InvalidRequest(str(ex))
Пример #25
0
def mimir_type_inference(dataset_name,
                         percent_conform,
                         make_input_certain=False):
    """Create a Mimir Type Inference Lens.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    percent_conform: float
        Percent that conforms
    make_input_certain: bool, optional
        Flag indicating whether input should be made certain

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    return ModuleSpecification(
        PACKAGE_MIMIR, MIMIR_TYPE_INFERENCE, {
            PARA_DATASET: dataset_name,
            PARA_PERCENT_CONFORM: percent_conform,
            PARA_MAKE_CERTAIN: make_input_certain
        })
Пример #26
0
def mimir_geocode(dataset_name,
                  geocoder,
                  house_nr=None,
                  street=None,
                  city=None,
                  state=None,
                  make_input_certain=False):
    """Create a Mimir Missing Value Lens.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    column: string or int
        Name or index for column
    make_input_certain: bool, optional
        Flag indicating whether input should be made certain

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    args = {
        PARA_DATASET: dataset_name,
        PARA_GEOCODER: geocoder,
        PARA_MAKE_CERTAIN: make_input_certain
    }
    if not house_nr is None:
        args[PARA_HOUSE_NUMBER] = house_nr
    if not street is None:
        args[PARA_STREET] = street
    if not city is None:
        args[PARA_CITY] = city
    if not state is None:
        args[PARA_STATE] = state
    return ModuleSpecification(PACKAGE_MIMIR, MIMIR_GEOCODE, args)
Пример #27
0
def mimir_picker(dataset_name, schema, pick_as=None, make_input_certain=False):
    """Create a Mimir Picker Lens.

    Parameters
    ----------
    dataset_name: string
        Name of the dataset
    schema: list(dict)
        List of objects containing 'pickFrom' and 'pickAs' elements
    make_input_certain: bool, optional
        Flag indicating whether input should be made certain

    Returns
    -------
    vizier.workflow.module.ModuleSpecification
    """
    args = {
        PARA_DATASET: dataset_name,
        PARA_SCHEMA: schema,
        PARA_MAKE_CERTAIN: make_input_certain
    }
    if not pick_as is None:
        args[PARA_PICKAS] = pick_as
    return ModuleSpecification(PACKAGE_MIMIR, MIMIR_PICKER, args)
Пример #28
0
 def test_validate_plot(self):
     """Test validation of plot cell command specifications."""
     plot = ModuleSpecification(
         'plot', 'CHART', {
             u'series': [{
                 u'series_column': u'A',
                 u'series_label': 'Fatal',
                 u'series_range': '0:20'
             }, {
                 u'series_column': u'B'
             }],
             u'chart': {
                 u'chartType': u'Bar Chart',
                 u'chartGrouped': True
             },
             u'name':
             u'My Chart',
             u'xaxis': {
                 u'xaxis_column': u'Year'
             },
             u'dataset':
             u'accidents'
         })
     cmd.validate_command(self.command_repository, plot)