def test_mimir_domain(self): """Test validation of Mimir domain lens.""" cmd = mimir_domain( dataset_name='ds', column=1, materialize_input=False, validate=True ).to_external_form( command=PACKAGE.get(mimir.MIMIR_DOMAIN), datasets=DATASETS ) self.assertEqual(cmd, 'DOMAIN FOR Street IN ds') with self.assertRaises(ValueError): md.ModuleCommand( mimir.PACKAGE_MIMIR, mimir.MIMIR_DOMAIN, arguments =[ md.ARG(id=pckg.PARA_COLUMN, value=1), md.ARG(id=mimir.PARA_MATERIALIZE_INPUT, value=False) ], packages={mimir.PACKAGE_MIMIR: PACKAGE} ) with self.assertRaises(ValueError): md.ModuleCommand( mimir.PACKAGE_MIMIR, mimir.MIMIR_DOMAIN, arguments =[ md.ARG(id=pckg.PARA_DATASET, value='DS'), md.ARG(id=mimir.PARA_MATERIALIZE_INPUT, value=False) ], packages={mimir.PACKAGE_MIMIR: PACKAGE} )
def unload_dataset(dataset_name, unload_format='csv', options=None, validate=False): """Create instance of load dataset command. Parameters ---------- dataset_name: string Name for the new dataset unload_format: string, optional Format identifier options: list, optional Additional options for Mimirs load command validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.engine.module.ModuleCommand """ arguments = [md.ARG(id=pckg.PARA_DATASET, value=dataset_name)] if not unload_format is None: arguments.append( md.ARG(id=vizual.PARA_UNLOAD_FORMAT, value=unload_format)) if not options is None: arguments.append(md.ARG(id=vizual.PARA_UNLOAD_OPTIONS, value=options)) return md.ModuleCommand(vizual.PACKAGE_VIZUAL, vizual.VIZUAL_UNLOAD, arguments=arguments, packages=PACKAGE(validate=validate))
def mimir_missing_key(dataset_name, column, materialize_input=False, validate=False): """Create instance of mimir missing key lens command. Parameters ---------- dataset_name: string Name of the dataset column: string or int Name or index for column missing_only: boolean, optional Optional MISSING_ONLY parameter materialize_input: bool, optional Flag indicating whether input should be materialized validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.viztrail.module.ModuleCommand """ return md.ModuleCommand( mimir.PACKAGE_MIMIR, mimir.MIMIR_MISSING_KEY, arguments =[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=pckg.PARA_COLUMN, value=column), md.ARG(id=mimir.PARA_MATERIALIZE_INPUT, value=materialize_input) ], packages=PACKAGE(validate=validate) )
def mimir_shape_detector( dataset_name, model_name, validate=False ): """Create instance of mimir type inference lens command. Parameters ---------- dataset_name: string Name of the dataset model_name: float the mimir model name that gets created or compared validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.viztrail.module.ModuleCommand """ return md.ModuleCommand( mimir.PACKAGE_MIMIR, mimir.MIMIR_SHAPE_DETECTOR, arguments =[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=mimir.PARA_MODEL_NAME, value=model_name) ], packages=PACKAGE(validate=validate) )
def r_cell(source, validate=False): """Get command specification for a R cell. Primarily intended for unit tests. Parameters ---------- source: string R code for cell body validate: bool, optional If true, the command is validated Returns ------- vizier.viztrail.command.ModuleCommand """ # If the validate flag is true create a package index that contains the # r package declaration if validate: packages = {r.PACKAGE_R: pckg.PackageIndex(r.R_COMMANDS)} else: packages = None return md.ModuleCommand( r.PACKAGE_R, r.R_CODE, arguments=[md.ARG(id=r.PARA_R_SOURCE, value=source)], packages=packages)
def move_column(dataset_name, column, position, validate=False): """Create instance of move column command. Parameters ---------- dataset_name: string Name of the dataset column: string or int Name or index for column that is being moves position: int Index position where column is moved to validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.engine.module.ModuleCommand """ return md.ModuleCommand(vizual.PACKAGE_VIZUAL, vizual.VIZUAL_MOV_COL, arguments=[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=pckg.PARA_COLUMN, value=column), md.ARG(id=vizual.PARA_POSITION, value=position) ], packages=PACKAGE(validate=validate))
def update_cell(dataset_name, column, row, value, validate=False): """Create instance of update dataset cell command. Parameters ---------- dataset_name: string Name of the dataset column: int Cell column identifier row: int Unique row identifier for cell value: string New cell value validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.engine.module.ModuleCommand """ return md.ModuleCommand(vizual.PACKAGE_VIZUAL, vizual.VIZUAL_UPD_CELL, arguments=[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=pckg.PARA_COLUMN, value=column), md.ARG(id=vizual.PARA_ROW, value=row), md.ARG(id=vizual.PARA_VALUE, value=value), ], packages=PACKAGE(validate=validate))
def insert_column(dataset_name, position, name, validate=False): """Create instance of insert column command. Parameters ---------- dataset_name: string Name of the dataset position: int Index position where column is inserted name: string New column name validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.engine.module.ModuleCommand """ return md.ModuleCommand(vizual.PACKAGE_VIZUAL, vizual.VIZUAL_INS_COL, arguments=[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=vizual.PARA_POSITION, value=position), md.ARG(id=pckg.PARA_NAME, value=name) ], packages=PACKAGE(validate=validate))
def mimir_type_inference( dataset_name, percent_conform, materialize_input=False, validate=False ): """Create instance of mimir type inference lens command. Parameters ---------- dataset_name: string Name of the dataset percent_conform: float Percent that conforms materialize_input: bool, optional Flag indicating whether input should be materialized validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.viztrail.module.ModuleCommand """ return md.ModuleCommand( mimir.PACKAGE_MIMIR, mimir.MIMIR_TYPE_INFERENCE, arguments =[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=mimir.PARA_PERCENT_CONFORM, value=percent_conform), md.ARG(id=mimir.PARA_MATERIALIZE_INPUT, value=materialize_input) ], packages=PACKAGE(validate=validate) )
def sort_dataset(dataset_name, columns, validate=False): """Create instance of sort dataset command. Parameters ---------- dataset_name: string Name of the dataset columns: list List of column references. Expects a list of dictionaries with two elements 'column' and 'order' validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.engine.module.ModuleCommand """ # Create list of projection columns. The output name for each column is # optional elements = list() for col in columns: items = list() items.append(md.ARG(id=vizual.PARA_COLUMNS_COLUMN, value=col['column'])) items.append(md.ARG(id=vizual.PARA_COLUMNS_ORDER, value=col['order'])) elements.append(items) return md.ModuleCommand(vizual.PACKAGE_VIZUAL, vizual.VIZUAL_SORT, arguments=[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=vizual.PARA_COLUMNS, value=elements) ], packages=PACKAGE(validate=validate))
def rename_column(dataset_name, column, name, validate=False): """Create instance of rename dataset column command. Parameters ---------- dataset_name: string Name of the dataset column: string or int Name or index for column that is being renamed name: string New column name validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.engine.module.ModuleCommand """ return md.ModuleCommand(vizual.PACKAGE_VIZUAL, vizual.VIZUAL_REN_COL, arguments=[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=pckg.PARA_COLUMN, value=column), md.ARG(id=pckg.PARA_NAME, value=name) ], packages=PACKAGE(validate=validate))
def move_row(dataset_name, row, position, validate=False): """Create instance of move row command. Parameters ---------- dataset_name: string Name of the dataset row: int Index of row that is being moved position: int Index position where row is moved validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.engine.module.ModuleCommand """ return md.ModuleCommand(vizual.PACKAGE_VIZUAL, vizual.VIZUAL_MOV_ROW, arguments=[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=vizual.PARA_ROW, value=row), md.ARG(id=vizual.PARA_POSITION, value=position) ], packages=PACKAGE(validate=validate))
def sql_cell(source, output_dataset=None, validate=False): """Get command specification for a SQL cell. Primarily intended for unit tests. Parameters ---------- source: string SQL code for cell body output_dataset: string, optional Optional dataset name. If given result is materialized as new dataset. validate: bool, optional If true, the command is validated Returns ------- vizier.viztrail.command.ModuleCommand """ # If the validate flag is true create a package index that contains the # SQL package declaration if validate: packages = {sql.PACKAGE_SQL: pckg.PackageIndex(sql.SQL_COMMANDS)} else: packages = None arguments = [md.ARG(id=sql.PARA_SQL_SOURCE, value=source)] if not output_dataset is None: arguments.append( md.ARG(id=sql.PARA_OUTPUT_DATASET, value=output_dataset) ) return md.ModuleCommand( sql.PACKAGE_SQL, sql.SQL_QUERY, arguments=arguments, packages=packages )
def load_dataset(dataset_name, file, detect_headers=None, infer_types=None, load_format='csv', options=None, validate=False): """Create instance of load dataset command. Parameters ---------- dataset_name: string Name for the new dataset file: dict Dictionary containing at least one of 'fileId' or 'url' and optional 'userName' and 'password'. detect_headers: bool, optional Detect column names in loaded file if True infer_types: bool, optional Infer column types for loaded dataset if True load_format: string, optional Format identifier options: list, optional Additional options for Mimirs load command validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.engine.module.ModuleCommand """ arguments = [ md.ARG(id=vizual.PARA_FILE, value=file), md.ARG(id=pckg.PARA_NAME, value=dataset_name) ] if not detect_headers is None: arguments.append( md.ARG(id=vizual.PARA_DETECT_HEADERS, value=detect_headers)) if not infer_types is None: arguments.append(md.ARG(id=vizual.PARA_INFER_TYPES, value=infer_types)) if not load_format is None: arguments.append(md.ARG(id=vizual.PARA_LOAD_FORMAT, value=load_format)) if not options is None: arguments.append(md.ARG(id=vizual.PARA_LOAD_OPTIONS, value=options)) return md.ModuleCommand( vizual.PACKAGE_VIZUAL, vizual.VIZUAL_LOAD, arguments=arguments, packages=None #PACKAGE(validate=validate) )
def mimir_comment( dataset_name, comments, result_columns, materialize_input=False, validate=False ): """Create instance of mimir comment lens command. Parameters ---------- dataset_name: string Name of the dataset comments: list(dict) List of objects containing 'expression' and 'comment' elements comments: list(dict) List of objects containing 'column' elements for output materialize_input: bool, optional Flag indicating whether input should be materialized validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.viztrail.module.ModuleCommand """ comments = list() result_cols = list() for comment in comments: items = list() items.append(md.ARG(id=mimir.PARA_EXPRESSION, value=comment['expression'])) items.append(md.ARG(id=mimir.PARA_COMMENT, value=comment['comment'])) items.append(md.ARG(id=mimir.PARA_ROWID, value=comment['rowid'])) comments.append(items) for col in result_columns: col_arg = [md.ARG(id=pckg.PARA_COLUMN, value=col['column'])] result_cols.append(col_arg) return md.ModuleCommand( mimir.PACKAGE_MIMIR, mimir.MIMIR_COMMENT, arguments =[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=mimir.PARA_COMMENTS, value=comments), md.ARG(id=mimir.PARA_RESULT_COLUMNS, value=result_cols), md.ARG(id=mimir.PARA_MATERIALIZE_INPUT, value=materialize_input) ], packages=PACKAGE(validate=validate) )
def drop_dataset(dataset_name, validate=False): """Create instance of drop a dataset command. Parameters ---------- dataset_name: string Name of the dataset validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.engine.module.ModuleCommand """ return md.ModuleCommand( vizual.PACKAGE_VIZUAL, vizual.VIZUAL_DROP_DS, arguments=[md.ARG(id=pckg.PARA_DATASET, value=dataset_name)], packages=PACKAGE(validate=validate))
def mimir_geocode( dataset_name, geocoder, house_nr=None, street=None, city=None, state=None, materialize_input=False, validate=False ): """Create instance of mimir missing value lens command. Parameters ---------- dataset_name: string Name of the dataset column: string or int Name or index for column materialize_input: bool, optional Flag indicating whether input should be materialized validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.viztrail.module.ModuleCommand """ arguments =[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=mimir.PARA_GEOCODER, value=geocoder), md.ARG(id=mimir.PARA_MATERIALIZE_INPUT, value=materialize_input) ] # Add optional arguments if given if not house_nr is None: arguments.append(md.ARG(id=mimir.PARA_HOUSE_NUMBER, value=house_nr)) if not street is None: arguments.append(md.ARG(id=mimir.PARA_STREET, value=street)) if not city is None: arguments.append(md.ARG(id=mimir.PARA_CITY, value=city)) if not state is None: arguments.append(md.ARG(id=mimir.PARA_STATE, value=state)) return md.ModuleCommand( mimir.PACKAGE_MIMIR, mimir.MIMIR_GEOCODE, arguments=arguments, packages=PACKAGE(validate=validate) )
def mimir_picker( dataset_name, schema, pick_as=None, materialize_input=False, validate=False ): """Create instance of mimir picker lens command. Parameters ---------- dataset_name: string Name of the dataset schema: list(dict) List of dictionaries containing 'pickFrom' elements pick_as: string, optional Optional output column name materialize_input: bool, optional Flag indicating whether input should be materialized validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.viztrail.module.ModuleCommand """ elements = list() for col in schema: elements.append([md.ARG(id=mimir.PARA_PICKFROM, value=col['pickFrom'])]) arguments =[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=mimir.PARA_SCHEMA, value=elements), md.ARG(id=mimir.PARA_MATERIALIZE_INPUT, value=materialize_input) ] if not pick_as is None: arguments.append(md.ARG(id=mimir.PARA_PICKAS, value=pick_as)) return md.ModuleCommand( mimir.PACKAGE_MIMIR, mimir.MIMIR_PICKER, arguments=arguments, packages=PACKAGE(validate=validate) )
def mimir_missing_value(dataset_name, columns, materialize_input=False, validate=False): """Create instance of mimir missing value lens command. Parameters ---------- dataset_name: string Name of the dataset columns: list(dict) List of dictionaries containing at least entry 'column' and optional 'constraint' constraint: string, optional Optional value constraint materialize_input: bool, optional Flag indicating whether input should be materialized validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.viztrail.module.ModuleCommand """ column_list = list() for col in columns: col_arg = [md.ARG(id=pckg.PARA_COLUMN, value=col['column'])] if 'constraint' in col: col_arg.append(md.ARG(id=mimir.PARA_COLUMNS_CONSTRAINT, value=col['constraint'])) column_list.append(col_arg) arguments =[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=mimir.PARA_COLUMNS, value=column_list), md.ARG(id=mimir.PARA_MATERIALIZE_INPUT, value=materialize_input) ] return md.ModuleCommand( mimir.PACKAGE_MIMIR, mimir.MIMIR_MISSING_VALUE, arguments=arguments, packages=PACKAGE(validate=validate) )
def mimir_schema_matching( dataset_name, schema, result_name, materialize_input=False, validate=False ): """Create instance of mimir schema matching lens command. Parameters ---------- dataset_name: string Name of the dataset schema: list(dict) List of objects containing 'column' and 'type' elements materialize_input: bool, optional Flag indicating whether input should be materialized validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.viztrail.module.ModuleCommand """ elements = list() for col in schema: items = list() items.append(md.ARG(id=mimir.PARA_COLUMN_NAME, value=col['column'])) items.append(md.ARG(id=mimir.PARA_TYPE, value=col['type'])) elements.append(items) return md.ModuleCommand( mimir.PACKAGE_MIMIR, mimir.MIMIR_SCHEMA_MATCHING, arguments =[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=mimir.PARA_SCHEMA, value=elements), md.ARG(id=mimir.PARA_RESULT_DATASET, value=result_name), md.ARG(id=mimir.PARA_MATERIALIZE_INPUT, value=materialize_input) ], packages=PACKAGE(validate=validate) )
def delete_row(dataset_name, row, validate=False): """Create instance of delete dataset row command. Parameters ---------- dataset_name: string Name of the dataset row: int Index for row that is being deleted validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.engine.module.command.ModuleCommand """ return md.ModuleCommand(vizual.PACKAGE_VIZUAL, vizual.VIZUAL_DEL_ROW, arguments=[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=vizual.PARA_ROW, value=row) ], packages=PACKAGE(validate=validate))
def delete_column(dataset_name, column, validate=False): """Create instance of delete dataset column command. Parameters ---------- dataset_name: string Name of the dataset column: int Column identifier validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.engine.module.command.ModuleCommand """ return md.ModuleCommand(vizual.PACKAGE_VIZUAL, vizual.VIZUAL_DEL_COL, arguments=[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=pckg.PARA_COLUMN, value=column) ], packages=PACKAGE(validate=validate))
def create_plot(dataset_name, chart_name, series, chart_type='Bar Chart', chart_grouped=False, xaxis_range=None, xaxis_column=None, validate=False): """Create an instance of a create plot command. Parameters ---------- dataset_name: string Dataset name chart_name: string Name of the chart series: list Specification of data series. Each data series is specified by a dictionary that contains the mandadtory element 'column' and the optional elements 'range' and 'label' chart_type: string Identifier for chart type chart_grouped: bool Group multiple series into a single chart xaxis_range: string, optional Column value range definition xaxis_column: int, optional Column identifier validate: bool, optional If true, the command is validated Returns ------- vizier.engine.viztrail.command.ModuleCommand """ # If the validate flag is true create a package index that contains the # plot package declaration if validate: packages = {plot.PACKAGE_PLOT: pckg.PackageIndex(plot.PLOT_COMMANDS)} else: packages = None # Create a record for each series specification series_elements = list() for s in series: items = list() items.append(md.ARG(id=plot.PARA_SERIES_COLUMN, value=s['column'])) if 'label' in s: items.append(md.ARG(id=plot.PARA_SERIES_LABEL, value=s['label'])) if 'range' in s: items.append(md.ARG(id=plot.PARA_SERIES_RANGE, value=s['range'])) series_elements.append(items) # Create list of arguments arguments = [ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=pckg.PARA_NAME, value=chart_name), md.ARG(id=plot.PARA_SERIES, value=series_elements), md.ARG(id=plot.PARA_CHART, value=[ md.ARG(plot.PARA_CHART_TYPE, value=chart_type), md.ARG(id=plot.PARA_CHART_GROUPED, value=chart_grouped) ]) ] # Only add xaxis record if at least one of the two arguments are given if not xaxis_range is None or not xaxis_column is None: items = list() if not xaxis_column is None: items.append(md.ARG(id=plot.PARA_XAXIS_COLUMN, value=xaxis_column)) if not xaxis_range is None: items.append(md.ARG(id=plot.PARA_XAXIS_RANGE, value=xaxis_range)) arguments.append(md.ARG(id=plot.PARA_XAXIS, value=items)) return md.ModuleCommand(package_id=plot.PACKAGE_PLOT, command_id=plot.PLOT_SIMPLE_CHART, arguments=arguments, packages=packages)
def mimir_shape_detector( dataset_name, model_name, validate=False ): """Create instance of mimir type inference lens command. Parameters ---------- dataset_name: string Name of the dataset model_name: float the mimir model name that gets created or compared validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.viztrail.module.ModuleCommand """ return md.ModuleCommand( mimir.PACKAGE_MIMIR, mimir.MIMIR_SHAPE_DETECTOR, arguments =[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=mimir.PARA_MODEL_NAME, value=model_name) ], packages=PACKAGE(validate=validate) ) def mimir_comment( dataset_name, comments, result_columns, materialize_input=False, validate=False ): """Create instance of mimir comment lens command. Parameters ---------- dataset_name: string Name of the dataset comments: list(dict) List of objects containing 'expression' and 'comment' elements comments: list(dict) List of objects containing 'column' elements for output materialize_input: bool, optional Flag indicating whether input should be materialized validate: bool, optional Validate the created command specification (if true) Returns ------- vizier.viztrail.module.ModuleCommand """ comments = list() result_cols = list() for comment in comments: items = list() items.append(md.ARG(id=mimir.PARA_EXPRESSION, value=comment['expression'])) items.append(md.ARG(id=mimir.PARA_COMMENT, value=comment['comment'])) items.append(md.ARG(id=mimir.PARA_ROWID, value=comment['rowid'])) comments.append(items) for col in result_columns: col_arg = [md.ARG(id=pckg.PARA_COLUMN, value=col['column'])] result_cols.append(col_arg) return md.ModuleCommand( mimir.PACKAGE_MIMIR, mimir.MIMIR_COMMENT, arguments =[ md.ARG(id=pckg.PARA_DATASET, value=dataset_name), md.ARG(id=mimir.PARA_COMMENTS, value=elements), md.ARG(id=mimir.PARA_RESULT_COLUMNS, value=result_name), md.ARG(id=mimir.PARA_MATERIALIZE_INPUT, value=materialize_input) ], packages=PACKAGE(validate=validate) ) # ------------------------------------------------------------------------------ # Helper Methods # ------------------------------------------------------------------------------ def PACKAGE(validate=False): """Depending on the validate flag return a package dictionary that contains the mimir package declaration or None. Parameters ---------- validate: bool, optional Returns ------ dict """ if validate: return {mimir.PACKAGE_MIMIR: pckg.PackageIndex(mimir.MIMIR_LENSES)} else: return None