def get_dataset(self, name): """Get dataset with given name. Raises ValueError if the specified dataset does not exist. Parameters ---------- name : string Unique dataset name Returns ------- vizier.datastore.client.DatasetClient """ # Make sure to record access idependently of whether the dataset exists # or not. Ignore read access to datasets that have been written. if not name.lower() in self.write: self.read.add(name.lower()) # Get identifier for the dataset with the given name. Will raise an # exception if the name is unknown identifier = self.get_dataset_identifier(name) # Read dataset from datastore and return it. dataset = self.datastore.get_dataset(identifier) if dataset is None: raise ValueError('unknown dataset \'' + identifier + '\'') return DatasetClient(dataset=dataset, client=self, existing_name=name.lower())
def update_dataset(self, name, dataset): """Update a given dataset. Raises ValueError if the specified dataset does not exist. Parameters ---------- name : string Unique dataset name dataset : vizier.datastore.base.Dataset Dataset object Returns ------- vizier.datastore.client.DatasetClient """ # Get identifier for the dataset with the given name. Will raise an # exception if the name is unknown identifier = self.get_dataset_identifier(name) # Read dataset from datastore to get the column and row counter. source_dataset = self.datastore.get_dataset(identifier) if source_dataset is None: # Record access to the datasets self.read.add(name.lower()) raise ValueError('unknown dataset \'' + identifier + '\'') column_counter = source_dataset.max_column_id() + 1 row_counter = source_dataset.max_row_id() + 1 # Update column and row identifier columns = dataset.columns rows = dataset.rows # Ensure that all columns has positive identifier for col in columns: if col.identifier < 0: col.identifier = column_counter column_counter += 1 # Ensure that all rows have positive identifier #for row in rows: # if row.identifier < 0: # row.identifier = row_counter # row_counter += 1 # Write dataset to datastore and add new dataset to context #gather up the read dependencies so that we can pass them to mimir # so that we can at least track coarse grained provenance. # TODO: we are asumming mimir dataset and datastore # here and need to generalize this read_dep = [] for dept_name in self.read: if not isinstance(dept_name, str): raise RuntimeError('invalid read name') dept_id = self.get_dataset_identifier(dept_name) dept_dataset = self.datastore.get_dataset(dept_id) read_dep.append(dept_dataset.table_name) ds = self.datastore.create_dataset(columns=columns, rows=rows, annotations=dataset.annotations, dependencies=read_dep) self.set_dataset_identifier(name, ds.identifier) self.descriptors[ds.identifier] = ds return DatasetClient(dataset=self.datastore.get_dataset(ds.identifier))
def new_dataset(self) -> DatasetClient: """Get a dataset client instance for a new dataset. Returns ------- vizier.datastore.client.DatasetClient """ return DatasetClient(client=self)
def create_dataset(self, name, dataset, backend_options=[]): """Create a new dataset with given name. Raises ValueError if a dataset with given name already exist. Parameters ---------- name : string Unique dataset name dataset : vizier.datastore.client.DatasetClient Dataset object Returns ------- vizier.datastore.client.DatasetClient """ # Raise an exception if a dataset with the given name already exists or # if the name is not valid if self.has_dataset_identifier(name): # Record access to the datasets self.read.add(name.lower()) raise ValueError('dataset \'' + name + '\' already exists') if not is_valid_name(name): raise ValueError('invalid dataset name \'' + name + '\'') # Create list of columns for new dataset. Ensure that every column has # a positive identifier columns = list() if len(dataset.columns) > 0: column_counter = max( max([col.identifier for col in dataset.columns]) + 1, 0) for col in dataset.columns: if col.identifier < 0: col.identifier = column_counter column_counter += 1 columns.append( DatasetColumn(identifier=col.identifier, name=col.name, data_type=col.data_type)) rows = dataset.rows if len(rows) > 0: # Ensure that all rows have positive identifier row_counter = max(max([row.identifier for row in rows]) + 1, 0) for row in rows: if row.identifier < 0: row.identifier = row_counter row_counter += 1 # Write dataset to datastore and add new dataset to context ds = self.datastore.create_dataset(columns=columns, rows=rows, annotations=dataset.annotations, human_readable_name=name.upper(), backend_options=backend_options) self.set_dataset_identifier(name, ds.identifier) self.descriptors[ds.identifier] = ds return DatasetClient(dataset=self.datastore.get_dataset(ds.identifier))
def test_create_new_dataset(self): """Test creating and updating a new dataset via the client.""" client = VizierDBClient(datastore=self.datastore, datasets=dict()) ds = DatasetClient() ds.insert_column('Name') ds.insert_column('Age') ds.insert_row(['Alice', '23']) ds.insert_row(['Bob', '25']) rows = ds.rows ds = client.create_dataset('MyDataset', ds) # Ensure the returned dataset contains the input data self.assertEqual([c.name for c in ds.columns], ['Name', 'Age']) self.assertEqual([str(v) for v in ds.rows[0].values], ['Alice', '23']) self.assertEqual([str(v) for v in ds.rows[1].values], ['Bob', '25']) # Modify the reference to the original rows to ensure that the rows in # the loaded datasets are not affected self.assertEqual([str(v) for v in rows[0].values], ['Alice', '23']) rows[0].set_value(0, 'Jane') self.assertEqual([str(v) for v in rows[0].values], ['Jane', '23']) self.assertEqual([str(v) for v in ds.rows[0].values], ['Alice', '23']) # Update dataset ds.rows[1].set_value('Age', '26') client.update_dataset('MyDataset', ds) ds = client.get_dataset('MyDataset') self.assertEqual([str(v) for v in ds.rows[1].values], ['Bob', '26']) # Value error when creating dataset with existing name with self.assertRaises(ValueError): client.create_dataset('MyDataset', ds) # Value error when retrieving unknown dataset with self.assertRaises(ValueError): client.get_dataset('SomeDataset') # Ensure the returned dataset contains the modified data client.rename_dataset('MyDataset', 'SomeDataset') ds = client.get_dataset('SomeDataset') # Ensure that access to unknown datasets is recorded with self.assertRaises(ValueError): client.get_dataset('ThisIsNotADataset') for name in ['somedataset', 'mydataset']: self.assertTrue(name in client.read) self.assertTrue(name in client.write) self.assertTrue('thisisnotadataset' in client.read) self.assertFalse('thisisnotadataset' in client.write)
def create_dataset(self, name, dataset, backend_options=[]): """Create a new dataset with given name. Raises ValueError if a dataset with given name already exist. Parameters ---------- name : string Unique dataset name dataset : vizier.datastore.client.DatasetClient Dataset object Returns ------- vizier.datastore.client.DatasetClient """ # Raise an exception if a dataset with the given name already exists or # if the name is not valid if name.lower() in self.datasets: # Record access to the datasets raise ValueError('dataset \'' + name + '\' already exists') if not is_valid_name(name): raise ValueError('invalid dataset name \'' + name + '\'') # Create list of columns for new dataset. Ensure that every column has # a positive identifier columns = list() if len(dataset.columns) > 0: column_counter = max( max([col.identifier for col in dataset.columns]) + 1, 0) for col in dataset.columns: if col.identifier < 0: col.identifier = column_counter column_counter += 1 columns.append( DatasetColumn(identifier=col.identifier, name=col.name, data_type=col.data_type)) rows = dataset.rows # Write dataset to datastore and add new dataset to context ds = self.datastore.create_dataset(columns=columns, rows=rows, properties=dataset.properties, human_readable_name=name, backend_options=backend_options) self.datasets[name.lower()] = ds self.write.add(name.lower()) return DatasetClient(dataset=self.datastore.get_dataset(ds.identifier), client=self, existing_name=name.lower())