def move_column(self, identifier: str, column_id: int, position: int, datastore: Datastore) -> VizualApiResult: """Move a column within a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified column is unknown or the target position invalid. Parameters ---------- identifier: string Unique dataset identifier column_id: int Unique column identifier position: int Target position for the column datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Make sure that position is a valid column index in the new dataset if position < 0 or position > len(dataset.columns): raise ValueError('invalid target position \'' + str(position) + '\'') # Get index position of column that is being moved source_idx = dataset.get_index(column_id) if source_idx is None: raise ValueError('unknown column identifier \'' + str(column_id) + '\'') # No need to do anything if source position equals target position if source_idx != position: columns = list(dataset.columns) columns.insert(position, columns.pop(source_idx)) rows = dataset.fetch_rows() for row in rows: row.values.insert(position, row.values.pop(source_idx)) # Store updated dataset to get new identifier ds = datastore.create_dataset(columns=columns, rows=rows, properties={}) return VizualApiResult(ds) else: return VizualApiResult(dataset)
def rename_column(self, identifier: str, column_id: int, name: str, datastore: Datastore) -> VizualApiResult: """Rename column in a given dataset. Raises ValueError if no dataset with given identifier exists, if the specified column is unknown, or if the given column name is invalid. Parameters ---------- identifier: string Unique dataset identifier column_id: int Unique column identifier name: string New column name datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Raise ValueError if given colum name is invalid if not is_valid_name(name): raise ValueError('invalid column name \'' + name + '\'') # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Get the specified column that is to be renamed and set the column name # to the new name col_idx = dataset.get_index(column_id) if col_idx is None: raise ValueError('unknown column identifier \'' + str(column_id) + '\'') # Nothing needs to be changed if name does not differ from column name if dataset.columns[col_idx].name.lower() != name.lower(): columns = list(dataset.columns) col = columns[col_idx] columns[col_idx] = DatasetColumn(identifier=col.identifier, name=name, data_type=col.data_type) # Store updated dataset to get new identifier ds = datastore.create_dataset(columns=columns, rows=dataset.fetch_rows(), properties={}) return VizualApiResult(ds) else: return VizualApiResult(dataset)
def move_row(self, identifier: str, row_id: str, position: int, datastore: Datastore) -> VizualApiResult: """Move a row within a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified row or position is not within the range of the dataset. Parameters ---------- identifier: string Unique dataset identifier row_id: int Global row identifier for deleted row position: int Target position for the row datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') command = {"id": "moveRow", "row": row_id, "position": position} response = mimir.vizualScript(dataset.identifier, command) return VizualApiResult.from_mimir(response)
def delete_row(self, identifier: str, row_index: str, datastore: Datastore) -> VizualApiResult: """Delete a row in a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified row is not within the range of the dataset. Parameters ---------- identifier: string Unique dataset identifier row_index: int Row index for deleted row datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Make sure that row refers a valid row in the dataset if int(row_index) < 0 or int(row_index) >= dataset.row_count: raise ValueError('invalid row index \'' + str(row_index) + '\'') # Delete the row at the given index position rows = dataset.fetch_rows() del rows[int(row_index)] # Store updated dataset to get new identifier ds = datastore.create_dataset(columns=dataset.columns, rows=rows, properties={}) return VizualApiResult(ds)
def delete_row(self, identifier: str, row_index: str, datastore: Datastore) -> VizualApiResult: """Delete a row in a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified row is not within the range of the dataset. Parameters ---------- identifier: string Unique dataset identifier row_index: int Row index for deleted row datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') assert (isinstance(dataset, MimirDatasetHandle)) # Create a view for the modified dataset col_list = [] for col in dataset.columns: assert (isinstance(col, MimirDatasetColumn)) col_list.append(col.name_in_rdb) command = {"id": "deleteRow", "row": int(row_index)} response = mimir.vizualScript(dataset.identifier, command) return VizualApiResult.from_mimir(response)
def insert_row(self, identifier: str, position: int, datastore: Datastore) -> VizualApiResult: """Insert row at given position in a dataset. Raises ValueError if no dataset with given identifier exists or if the specified row psotion isoutside the dataset bounds. Parameters ---------- identifier: string Unique dataset identifier position: int Index position at which the row will be inserted datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') command = {"id": "insertRow", "position": position} response = mimir.vizualScript(dataset.identifier, command) return VizualApiResult.from_mimir(response)
def delete_column(self, identifier: str, column_id: int, datastore: Datastore) -> VizualApiResult: """Delete a column in a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified column is unknown. Parameters ---------- identifier: string Unique dataset identifier column_id: int Unique column identifier datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Confirm that the column actually exists and convert the column_identifier to # a position in the schema (usually ==, but not guaranteed) col_index = get_index_for_column(dataset, column_id) command = {"id": "deleteColumn", "column": col_index} response = mimir.vizualScript(dataset.identifier, command) return VizualApiResult.from_mimir(response, identifier)
def filter_columns(self, identifier: str, columns: List[int], names: List[str], datastore: Datastore) -> VizualApiResult: """Dataset projection operator. Returns a copy of the dataset with the given identifier that contains only those columns listed in columns. The list of names contains optional new names for the filtered columns. A value of None in names indicates that the name of the corresponding column is not changed. Raises ValueError if no dataset with given identifier exists or if any of the filter columns are unknown. Parameters ---------- identifier: string Unique dataset identifier columns: list(int) List of column identifier for columns in the result. names: list(string) Optional new names for filtered columns. datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # The schema of the new dataset only contains the columns in the given # list. Keep track of their index positions to filter values. schema = list() val_filter = list() for i in range(len(columns)): col_idx = dataset.get_index(columns[i]) if col_idx is None: raise ValueError('unknown column identifier \'' + str(columns[i]) + '\'') col = dataset.columns[col_idx] if not names[i] is None: schema.append( DatasetColumn(identifier=col.identifier, name=names[i], data_type=col.data_type)) else: schema.append(col) val_filter.append(col_idx) # Create a list of projected rows rows = list() for row in dataset.fetch_rows(): values = list() for v_idx in val_filter: values.append(row.values[v_idx]) rows.append(DatasetRow(identifier=row.identifier, values=values)) # Store updated dataset to get new identifier ds = datastore.create_dataset(columns=schema, rows=rows, properties={}) return VizualApiResult(ds)
def update_cell(self, identifier, column_id, row_id, value, datastore): """Update a cell in a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified cell is outside of the current dataset ranges. Parameters ---------- identifier : string Unique dataset identifier column_id: int Unique column identifier for updated cell row_id: int Unique row identifier value: string New cell value datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Get the index of the specified cell column col_index = get_index_for_column(dataset, column_id) # Raise exception if row id is not valid # Create a view for the modified dataset col_list = [] for i in range(len(dataset.columns)): col = dataset.columns[i] if i == col_index: try: val_stmt = col.to_sql_value(value) col_sql = val_stmt + ' ELSE ' + col.name_in_rdb + ' END ' except ValueError: col_sql = '\'' + str( value ) + '\' ELSE CAST({{input}}.' + col.name_in_rdb + ' AS varchar) END ' rid_sql = MIMIR_ROWID_COL.to_sql_value(row_id) stmt = 'CASE WHEN ' + ROW_ID + ' = ' + rid_sql + ' THEN ' stmt += col_sql stmt += 'AS ' + col.name_in_rdb col_list.append(stmt) else: col_list.append(col.name_in_rdb) sql = 'SELECT ' + ','.join( col_list) + ' FROM ' + dataset.table_name + ';' view_name, dependencies = mimir.createView(dataset.table_name, sql) # Store updated dataset information with new identifier ds = datastore.register_dataset(table_name=view_name, columns=dataset.columns, row_counter=dataset.row_counter, annotations=dataset.annotations) return VizualApiResult(ds)
def move_column(self, identifier, column_id, position, datastore): """Move a column within a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified column is unknown or the target position invalid. Parameters ---------- identifier: string Unique dataset identifier column_id: int Unique column identifier position: int Target position for the column datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Make sure that position is a valid column index in the new dataset if position < 0 or position > len(dataset.columns): raise ValueError('invalid target position \'' + str(position) + '\'') # Get index position of column that is being moved source_idx = get_index_for_column(dataset, column_id) # No need to do anything if source position equals target position if source_idx != position: # There are no changes to the underlying database. We only need to # change the column information in the dataset schema. schema = list(dataset.columns) schema.insert(position, schema.pop(source_idx)) # Store updated dataset to get new identifier ds = datastore.register_dataset(table_name=dataset.table_name, columns=schema, row_counter=dataset.row_counter, annotations=dataset.annotations) return VizualApiResult(ds) else: return VizualApiResult(dataset)
def sort_dataset(self, identifier: str, columns: List[int], reversed: List[bool], datastore: Datastore) -> VizualApiResult: """Sort the dataset with the given identifier according to the order by statement. The order by statement is a pair of lists. The first list contains the identifier of columns to sort on. The second list contains boolean flags, one for each entry in columns, indicating whether sort order is revered for the corresponding column or not. Returns the number of rows in the dataset and the identifier of the sorted dataset. Raises ValueError if no dataset with given identifier exists or if any of the columns in the order by clause are unknown. Parameters ---------- identifier: string Unique dataset identifier columns: list(int) List of column identifier for sort columns. reversed: list(bool) Flags indicating whether the sort order of the corresponding column is reveresed. datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Fetch the full set of rows rows = dataset.fetch_rows() # Sort multiple times, ones for each of the sort columns (in reverse # order of appearance in the order by clause) for i in range(len(columns)): l_idx = len(columns) - (i + 1) col_id = columns[l_idx] col_idx = dataset.get_index(col_id) # print("SORT: {}".format(col_idx)) # print("\n".join(", ".join("'{}':{}".format(v, type(v)) for v in row.values) for row in rows)) reverse = reversed[l_idx] if col_idx is None: raise ValueError('unknown column identifier \'' + str(col_id) + '\'') else: rows.sort(key=lambda row: row.values[cast(int, col_idx)], reverse=reverse) # Store updated dataset to get new identifier ds = datastore.create_dataset(columns=dataset.columns, rows=rows, properties={}) return VizualApiResult(ds)
def insert_column(self, identifier, position, name, datastore): """Insert column with given name at given position in dataset. Raises ValueError if no dataset with given identifier exists, if the specified column position is outside of the current schema bounds, or if the column name is invalid. Parameters ---------- identifier: string Unique dataset identifier position: int Index position at which the column will be inserted name: string, optional New column name datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Raise ValueError if given colum name is invalid if not is_valid_name(name): raise ValueError('invalid column name \'' + str(name) + '\'') # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Make sure that position is a valid column index in the new dataset if position < 0 or position > len(dataset.columns): raise ValueError('invalid column index \'' + str(position) + '\'') # Get identifier for new column col_id = dataset.max_column_id() + 1 # Insert new column into schema schema = list(dataset.columns) new_column = MimirDatasetColumn(col_id, name, name) schema.insert(position, new_column) # Create a view for the modified schema col_list = [] for col in schema: if col.identifier == new_column.identifier: # Note: By no (April 2018) this requires Mimir to run with the # XNULL option. Otherwise, in some scenarios setting the all # values in the new column to NULL may cause an exception. col_list.append(" CAST('' AS int) AS " + col.name_in_rdb) else: col_list.append(col.name_in_rdb) sql = 'SELECT ' + ','.join( col_list) + ' FROM ' + dataset.table_name + ';' view_name, dependencies = mimir.createView(dataset.table_name, sql) # Store updated dataset information with new identifier ds = datastore.register_dataset(table_name=view_name, columns=schema, row_counter=dataset.row_counter, annotations=dataset.annotations) return VizualApiResult(ds)
def filter_columns(self, identifier: str, columns: List[int], names: List[str], datastore: Datastore) -> VizualApiResult: """Dataset projection operator. Returns a copy of the dataset with the given identifier that contains only those columns listed in columns. The list of names contains optional new names for the filtered columns. A value of None in names indicates that the name of the corresponding column is not changed. Raises ValueError if no dataset with given identifier exists or if any of the filter columns are unknown. Parameters ---------- identifier: string Unique dataset identifier columns: list(int) List of column identifier for columns in the result. names: list(string) Optional new names for filtered columns. datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # The schema of the new dataset only contains the columns in the given # list. A column might need to be renamed. schema = list() column_mapping = list() col_list = [] for i in range(len(columns)): col_idx = get_index_for_column(dataset, columns[i]) col = dataset.columns[col_idx] if not names[i] is None: if not is_valid_name(names[i]): raise ValueError('invalid column name \'' + str(names[i]) + '\'') schema.append( MimirDatasetColumn(identifier=col.identifier, name_in_dataset=names[i], name_in_rdb=names[i])) else: schema.append(col) column_mapping.append({ "columns_column": col_idx, "columns_name": schema[-1].name }) col_list.append(col.name_in_rdb) command = {"id": "projection", "columns": column_mapping} response = mimir.vizualScript(dataset.identifier, command) return VizualApiResult.from_mimir(response)
def move_row(self, identifier: str, row_id: str, position: int, datastore: Datastore): """Move a row within a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified row or position is not within the range of the dataset. Parameters ---------- identifier: string Unique dataset identifier row_index: int Row index for deleted row position: int Target position for the row datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Make sure that row is within dataset bounds if int(row_id) < 0 or int(row_id) >= dataset.row_count: raise ValueError('invalid source row \'' + str(row_id) + '\'') # Make sure that position is a valid row index in the new dataset if position < 0 or position > dataset.row_count: raise ValueError('invalid target position \'' + str(position) + '\'') # No need to do anything if source position equals target position if row_id != position: rows = dataset.fetch_rows() rows.insert(position, rows.pop(int(row_id))) # Store updated dataset to get new identifier ds = datastore.create_dataset(columns=dataset.columns, rows=rows, properties={}) return VizualApiResult(ds) else: return VizualApiResult(dataset)
def move_column(self, identifier: str, column_id: int, position: int, datastore: Datastore) -> VizualApiResult: """Move a column within a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified column is unknown or the target position invalid. Parameters ---------- identifier: string Unique dataset identifier column_id: int Unique column identifier position: int Target position for the column datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Make sure that position is a valid column index in the new dataset if position < 0 or position > len(dataset.columns): raise ValueError('invalid target position \'' + str(position) + '\'') # Get index position of column that is being moved source_idx = get_index_for_column(dataset, column_id) # No need to do anything if source position equals target position if source_idx != position: # Keep the mimir-side schema aligned with the vizier-side schema command = { "id": "moveColumn", "column": source_idx, "position": position } response = mimir.vizualScript(dataset.identifier, command) return VizualApiResult.from_mimir(response) else: return VizualApiResult(dataset)
def filter_columns(self, identifier, columns, names, datastore): """Dataset projection operator. Returns a copy of the dataset with the given identifier that contains only those columns listed in columns. The list of names contains optional new names for the filtered columns. A value of None in names indicates that the name of the corresponding column is not changed. Raises ValueError if no dataset with given identifier exists or if any of the filter columns are unknown. Parameters ---------- identifier: string Unique dataset identifier columns: list(int) List of column identifier for columns in the result. names: list(string) Optional new names for filtered columns. datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # The schema of the new dataset only contains the columns in the given # list. A column might need to be renamed. schema = list() col_list = [] for i in range(len(columns)): col_idx = get_index_for_column(dataset, columns[i]) col = dataset.columns[col_idx] if not names[i] is None: schema.append( MimirDatasetColumn(identifier=col.identifier, name_in_dataset=names[i], name_in_rdb=col.name_in_rdb)) else: schema.append(col) col_list.append(col.name_in_rdb) sql = 'SELECT ' + ','.join( col_list) + ' FROM ' + dataset.table_name + ';' view_name, dependencies = mimir.createView(dataset.table_name, sql) # Store updated dataset information with new identifier ds = datastore.register_dataset(table_name=view_name, columns=schema, row_counter=dataset.row_counter, annotations=dataset.annotations.filter( columns=columns, rows=dataset.row_ids)) return VizualApiResult(ds)
def move_row(self, identifier, row_index, position, datastore): """Move a row within a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified row or position is not within the range of the dataset. Parameters ---------- identifier: string Unique dataset identifier row_index: int Row index for deleted row position: int Target position for the row datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Make sure that row is within dataset bounds if row_index < 0 or row_index >= dataset.row_count: raise ValueError('invalid source row \'' + str(row_index) + '\'') # Make sure that position is a valid row index in the new dataset if position < 0 or position > dataset.row_count: raise ValueError('invalid target position \'' + str(position) + '\'') # No need to do anything if source position equals target position # Store updated dataset to get new identifier ds = datastore.register_dataset(table_name=dataset.table_name, columns=dataset.columns, row_counter=dataset.row_counter, annotations=dataset.annotations) return VizualApiResult(ds) else: return VizualApiResult(dataset)
def materialize_dataset(self, identifier: str, datastore: Datastore) -> VizualApiResult: """Create a materialized snapshot of the dataset for faster execution. This is a no-op for the FS Backend """ dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') return VizualApiResult(dataset)
def update_cell(self, identifier: str, column_id: int, row_id: str, value: str, datastore: Datastore) -> VizualApiResult: """Update a cell in a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified cell is outside of the current dataset ranges. Parameters ---------- identifier : string Unique dataset identifier column_id: int Unique column identifier for updated cell row_id: int Unique row identifier value: string New cell value datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Get column index forst in case it raises an exception col_idx = dataset.get_index(column_id) if col_idx is None: raise ValueError('unknown column identifier \'' + str(column_id) + '\'') # Update the specified cell in the given data array rows = dataset.fetch_rows() row_index = -1 for i in range(len(rows)): if int(rows[i].identifier) == int(row_id): row_index = i break # Make sure that row refers a valid row in the dataset if row_index < 0: raise ValueError('invalid row identifier \'' + str(row_id) + '\'') r = rows[row_index] values = list(r.values) values[col_idx] = value rows[row_index] = DatasetRow(identifier=r.identifier, values=values) # Store updated dataset to get new identifier ds = datastore.create_dataset(columns=dataset.columns, rows=rows, properties={}) return VizualApiResult(ds)
def sort_dataset(self, identifier: str, columns: List[int], reversed: List[bool], datastore: Datastore) -> VizualApiResult: """Sort the dataset with the given identifier according to the order by statement. The order by statement is a pair of lists. The first list contains the identifier of columns to sort on. The second list contains boolean flags, one for each entry in columns, indicating whether sort order is revered for the corresponding column or not. Returns the number of rows in the dataset and the identifier of the sorted dataset. Raises ValueError if no dataset with given identifier exists or if any of the columns in the order by clause are unknown. Parameters ---------- identifier: string Unique dataset identifier columns: list(int) List of column identifier for sort columns. reversed: list(bool) Flags indicating whether the sort order of the corresponding column is reveresed. datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) assert (isinstance(dataset, MimirDatasetHandle)) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Create order by clause based on columns and reversed flags order_by_clause = list() for i in range(len(columns)): col_id = columns[i] stmt = cast(MimirDatasetColumn, dataset.column_by_id(col_id)).name_in_rdb if reversed[i]: stmt += ' DESC' order_by_clause.append(stmt) sql = 'SELECT * FROM ' + dataset.identifier + ' ORDER BY ' sql += ','.join(order_by_clause) view_name, dependencies, schema, properties, functionDeps = mimir.createView( datasets={dataset.identifier: dataset.identifier}, query=sql) ds = MimirDatasetHandle.from_mimir_result(view_name, schema, properties) return VizualApiResult(ds)
def import_dataset(self, datastore: Datastore, project_id: str, dataset_id: str) -> VizualApiResult: from vizier.api.webservice.server import api # Mimir doesn't actually need to use the project ID (yet), but let's check the # URL for safety anyway project = api.projects.projects.get_project(project_id) if project is None: raise Exception("No Such Project: {}".format(project_id)) # Get the actual dataset dataset = datastore.get_dataset(dataset_id) if dataset is None: raise Exception("No Such Dataset: {}".format(dataset_id)) return VizualApiResult(dataset, {})
def sort_dataset(self, identifier, columns, reversed, datastore): """Sort the dataset with the given identifier according to the order by statement. The order by statement is a pair of lists. The first list contains the identifier of columns to sort on. The second list contains boolean flags, one for each entry in columns, indicating whether sort order is revered for the corresponding column or not. Returns the number of rows in the dataset and the identifier of the sorted dataset. Raises ValueError if no dataset with given identifier exists or if any of the columns in the order by clause are unknown. Parameters ---------- identifier: string Unique dataset identifier columns: list(int) List of column identifier for sort columns. reversed: list(bool) Flags indicating whether the sort order of the corresponding column is reveresed. datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Create order by clause based on columns and reversed flags order_by_clause = list() for i in range(len(columns)): col_id = columns[i] stmt = dataset.column_by_id(col_id).name_in_rdb if reversed[i]: stmt += ' DESC' order_by_clause.append(stmt) sql = 'SELECT * FROM {{input}} ORDER BY ' sql += ','.join(order_by_clause) + ';' view_name, dependencies = mimir.createView(dataset.table_name, sql) # Register new dataset with only a modified list of row identifier ds = datastore.register_dataset(table_name=view_name, columns=dataset.columns, annotations=dataset.annotations) return VizualApiResult(ds)
def insert_column(self, identifier: str, position: int, name: str, datastore: Datastore) -> VizualApiResult: """Insert column with given name at given position in dataset. Raises ValueError if no dataset with given identifier exists, if the specified column position is outside of the current schema bounds, or if the column name is invalid. Parameters ---------- identifier: string Unique dataset identifier position: int Index position at which the column will be inserted name: string New column name datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Raise ValueError if given colum name is invalid if not name is None and not is_valid_name(name): raise ValueError('invalid column name \'' + name + '\'') # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Make sure that position is a valid column index in the new dataset if position < 0 or position > len(dataset.columns): raise ValueError('invalid column index \'' + str(position) + '\'') # Insert new column into dataset columns = list(dataset.columns) rows = dataset.fetch_rows() columns.insert( position, DatasetColumn(identifier=dataset.max_column_id() + 1, name=name if not name is None else '')) # Add a null value to each row for the new column for row in rows: row.values.insert(position, None) # Store updated dataset to get new identifier ds = datastore.create_dataset(columns=columns, rows=rows, properties={}) return VizualApiResult(ds)
def materialize_dataset(self, identifier: str, datastore: Datastore) -> VizualApiResult: """Create a materialized snapshot of the dataset for faster execution.""" input_dataset = datastore.get_dataset(identifier) if input_dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') cast(MimirDatasetHandle, input_dataset) response = mimir.materialize(input_dataset.identifier) output_ds = MimirDatasetHandle( identifier=response["name"], columns=cast(List[MimirDatasetColumn], input_dataset.columns), properties=input_dataset.get_properties(), name=input_dataset.name if input_dataset.name is not None else "untitled dataset") return VizualApiResult(output_ds)
def insert_row(self, identifier, position, datastore): """Insert row at given position in a dataset. Raises ValueError if no dataset with given identifier exists or if the specified row psotion isoutside the dataset bounds. Parameters ---------- identifier: string Unique dataset identifier position: int Index position at which the row will be inserted datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Make sure that position is a valid row index in the new dataset if position < 0 or position > len(dataset.row_ids): raise ValueError('invalid row index \'' + str(position) + '\'') # Get unique id for new row dataset.row_counter += 1 # Create a view for the modified schema col_list = [] for col in dataset.columns: col_list.append(col.name_in_rdb) sql = 'SELECT ' + ','.join(col_list) + ' FROM ' + dataset.table_name mimirSchema = mimir.getSchema(sql) union_list = [] for col in mimirSchema[1:]: union_list.append('CAST(NULL AS ' + col['baseType'] + ') AS ' + col['name']) sql = '(' + sql + ') UNION ALL (SELECT ' + ','.join(union_list) + ');' view_name, dependencies = mimir.createView(dataset.table_name, sql) # Store updated dataset information with new identifier ds = datastore.register_dataset(table_name=view_name, columns=dataset.columns, row_counter=dataset.row_counter, annotations=dataset.annotations) return VizualApiResult(ds)
def unload_dataset(self, dataset: DatasetDescriptor, datastore: Datastore, filestore: Filestore, unload_format: str = 'csv', options: List[Dict[str, Any]] = [], resources: Dict[str, Any] = None): """Export (or unload) a dataset to a given file format. The resources refer to any resoures (e.g., file identifier) that have been generated by a previous execution of the respective task. This allows to associate an identifier with a downloaded file to avoid future downloads (unless the reload flag is True). Parameters ---------- datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets filestore: vizier.filestore.Filestore Filestore to retrieve uploaded datasets unload_format: string, optional Format identifier options: list, optional Additional options for Mimirs load command resources: dict, optional Dictionary of additional resources (i.e., key,value pairs) that were generated during a previous execution of the associated module Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ f_handles = None result_resources = dict() assert (isinstance(datastore, MimirDatastore)) assert (isinstance(filestore, FileSystemFilestore)) if dataset is not None: f_handles = datastore.unload_dataset( filepath=filestore.get_file_dir(get_unique_identifier()), dataset_name=dataset.identifier, format=unload_format, options=options) result_resources[base.RESOURCE_FILEID] = f_handles return VizualApiResult(dataset=dataset, resources=result_resources)
def empty_dataset( self, datastore: Datastore, filestore: Filestore, initial_columns: List[Tuple[str, str]] = [("''", "unnamed_column")] ) -> VizualApiResult: """Create (or load) a new dataset from a given file or Uri. It is guaranteed that either the file identifier or the url are not None but one of them will be None. The user name and password may only be given if an url is given. The resources refer to any resoures (e.g., file identifier) that have been generated by a previous execution of the respective task. This allows to associate an identifier with a downloaded file to avoid future downloads (unless the reload flag is True). Parameters ---------- datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets filestore: vizier.filestore.Filestore Filestore to retrieve uploaded datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ assert (isinstance(datastore, MimirDatastore)) ds = datastore.create_dataset( columns=[ MimirDatasetColumn(identifier=id, name_in_dataset=col, data_type="varchar") for id, (default, col) in enumerate(initial_columns) ], rows=[ DatasetRow( identifier=str(id), values=[default for default, col in initial_columns]) for id in range(1, 2) ], human_readable_name="Empty Table", ) return VizualApiResult(dataset=ds)
def delete_column(self, identifier, column_id, datastore): """Delete a column in a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified column is unknown. Parameters ---------- identifier: string Unique dataset identifier column_id: int Unique column identifier datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Get the index of the specified column that is to be deleted. col_index = dataset.get_index(column_id) if col_index is None: raise ValueError('unknown column identifier \'' + str(column_id) + '\'') # Delete column from schema. Keep track of the column name for the # result output. columns = list(dataset.columns) name = columns[col_index].name del columns[col_index] # Delete all value for the deleted column rows = dataset.fetch_rows() for row in rows: del row.values[col_index] # Store updated dataset to get new identifier ds = datastore.create_dataset( columns=columns, rows=rows, annotations=dataset.annotations.filter( columns=[c.identifier for c in columns])) return VizualApiResult(ds)
def insert_column(self, identifier: str, position: int, name: str, datastore: Datastore) -> VizualApiResult: """Insert column with given name at given position in dataset. Raises ValueError if no dataset with given identifier exists, if the specified column position is outside of the current schema bounds, or if the column name is invalid. Parameters ---------- identifier: string Unique dataset identifier position: int Index position at which the column will be inserted name: string, optional New column name datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Raise ValueError if given colum name is invalid if not is_valid_name(name): raise ValueError('invalid column name \'' + str(name) + '\'') # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Make sure that position is a valid column index in the new dataset if position < 0 or position > len(dataset.columns): raise ValueError('invalid column index \'' + str(position) + '\'') # Get identifier for new column col_id = dataset.max_column_id() + 1 # Insert new column into schema schema = list(dataset.columns) new_column = MimirDatasetColumn(col_id, name, name) schema.insert(position, new_column) command = {"id": "insertColumn", "name": name, "position": position} response = mimir.vizualScript(dataset.identifier, command) return VizualApiResult.from_mimir(response)
def delete_column(self, identifier, column_id, datastore): """Delete a column in a given dataset. Raises ValueError if no dataset with given identifier exists or if the specified column is unknown. Parameters ---------- identifier: string Unique dataset identifier column_id: int Unique column identifier datastore : vizier.datastore.fs.base.FileSystemDatastore Datastore to retireve and update datasets Returns ------- vizier.engine.packages.vizual.api.VizualApiResult """ # Get dataset. Raise exception if dataset is unknown dataset = datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') # Get the index of the specified column that is to be deleted. col_index = get_index_for_column(dataset, column_id) # Delete column from schema schema = list(dataset.columns) del schema[col_index] # Create a view for the modified schema col_list = [] for col in schema: col_list.append(col.name_in_rdb) sql = 'SELECT ' + ','.join( col_list) + ' FROM ' + dataset.table_name + ';' view_name, dependencies = mimir.createView(dataset.table_name, sql) # Store updated dataset information with new identifier ds = datastore.register_dataset(table_name=view_name, columns=schema, row_counter=dataset.row_counter, annotations=dataset.annotations) return VizualApiResult(ds)