示例#1
0
    def get_dataset(self, name):
        """Get dataset with given name.

        Raises ValueError if the specified dataset does not exist.

        Parameters
        ----------
        name : string
            Unique dataset name

        Returns
        -------
        vizier.datastore.client.DatasetClient
        """
        # Make sure to record access idependently of whether the dataset exists
        # or not. Ignore read access to datasets that have been written.
        if not name.lower() in self.write:
            self.read.add(name.lower())
        # Get identifier for the dataset with the given name. Will raise an
        # exception if the name is unknown
        identifier = self.get_dataset_identifier(name)
        # Read dataset from datastore and return it.
        dataset = self.datastore.get_dataset(identifier)
        if dataset is None:
            raise ValueError('unknown dataset \'' + identifier + '\'')
        return DatasetClient(dataset=dataset,
                             client=self,
                             existing_name=name.lower())
示例#2
0
    def update_dataset(self, name, dataset):
        """Update a given dataset.

        Raises ValueError if the specified dataset does not exist.

        Parameters
        ----------
        name : string
            Unique dataset name
        dataset : vizier.datastore.base.Dataset
            Dataset object

        Returns
        -------
        vizier.datastore.client.DatasetClient
        """
        # Get identifier for the dataset with the given name. Will raise an
        # exception if the name is unknown
        identifier = self.get_dataset_identifier(name)
        # Read dataset from datastore to get the column and row counter.
        source_dataset = self.datastore.get_dataset(identifier)
        if source_dataset is None:
            # Record access to the datasets
            self.read.add(name.lower())
            raise ValueError('unknown dataset \'' + identifier + '\'')
        column_counter = source_dataset.max_column_id() + 1
        row_counter = source_dataset.max_row_id() + 1
        # Update column and row identifier
        columns = dataset.columns
        rows = dataset.rows
        # Ensure that all columns has positive identifier
        for col in columns:
            if col.identifier < 0:
                col.identifier = column_counter
                column_counter += 1
        # Ensure that all rows have positive identifier
        #for row in rows:
        #    if row.identifier < 0:
        #        row.identifier = row_counter
        #        row_counter += 1
        # Write dataset to datastore and add new dataset to context

        #gather up the read dependencies so that we can pass them to mimir
        # so that we can at least track coarse grained provenance.
        # TODO: we are asumming mimir dataset and datastore
        #       here and need to generalize this
        read_dep = []
        for dept_name in self.read:
            if not isinstance(dept_name, str):
                raise RuntimeError('invalid read name')
            dept_id = self.get_dataset_identifier(dept_name)
            dept_dataset = self.datastore.get_dataset(dept_id)
            read_dep.append(dept_dataset.table_name)
        ds = self.datastore.create_dataset(columns=columns,
                                           rows=rows,
                                           annotations=dataset.annotations,
                                           dependencies=read_dep)
        self.set_dataset_identifier(name, ds.identifier)
        self.descriptors[ds.identifier] = ds
        return DatasetClient(dataset=self.datastore.get_dataset(ds.identifier))
示例#3
0
    def new_dataset(self) -> DatasetClient:
        """Get a dataset client instance for a new dataset.

        Returns
        -------
        vizier.datastore.client.DatasetClient
        """
        return DatasetClient(client=self)
示例#4
0
    def create_dataset(self, name, dataset, backend_options=[]):
        """Create a new dataset with given name.

        Raises ValueError if a dataset with given name already exist.

        Parameters
        ----------
        name : string
            Unique dataset name
        dataset : vizier.datastore.client.DatasetClient
            Dataset object

        Returns
        -------
        vizier.datastore.client.DatasetClient
        """
        # Raise an exception if a dataset with the given name already exists or
        # if the name is not valid
        if self.has_dataset_identifier(name):
            # Record access to the datasets
            self.read.add(name.lower())
            raise ValueError('dataset \'' + name + '\' already exists')
        if not is_valid_name(name):
            raise ValueError('invalid dataset name \'' + name + '\'')
        # Create list of columns for new dataset. Ensure that every column has
        # a positive identifier
        columns = list()
        if len(dataset.columns) > 0:
            column_counter = max(
                max([col.identifier for col in dataset.columns]) + 1, 0)
            for col in dataset.columns:
                if col.identifier < 0:
                    col.identifier = column_counter
                    column_counter += 1
                columns.append(
                    DatasetColumn(identifier=col.identifier,
                                  name=col.name,
                                  data_type=col.data_type))
        rows = dataset.rows
        if len(rows) > 0:
            # Ensure that all rows have positive identifier
            row_counter = max(max([row.identifier for row in rows]) + 1, 0)
            for row in rows:
                if row.identifier < 0:
                    row.identifier = row_counter
                    row_counter += 1
        # Write dataset to datastore and add new dataset to context
        ds = self.datastore.create_dataset(columns=columns,
                                           rows=rows,
                                           annotations=dataset.annotations,
                                           human_readable_name=name.upper(),
                                           backend_options=backend_options)
        self.set_dataset_identifier(name, ds.identifier)
        self.descriptors[ds.identifier] = ds
        return DatasetClient(dataset=self.datastore.get_dataset(ds.identifier))
 def test_create_new_dataset(self):
     """Test creating and updating a new dataset via the client."""
     client = VizierDBClient(datastore=self.datastore, datasets=dict())
     ds = DatasetClient()
     ds.insert_column('Name')
     ds.insert_column('Age')
     ds.insert_row(['Alice', '23'])
     ds.insert_row(['Bob', '25'])
     rows = ds.rows
     ds = client.create_dataset('MyDataset', ds)
     # Ensure the returned dataset contains the input data
     self.assertEqual([c.name for c in ds.columns], ['Name', 'Age'])
     self.assertEqual([str(v) for v in ds.rows[0].values], ['Alice', '23'])
     self.assertEqual([str(v) for v in ds.rows[1].values], ['Bob', '25'])
     # Modify the reference to the original rows to ensure that the rows in
     # the loaded datasets are not affected
     self.assertEqual([str(v) for v in rows[0].values], ['Alice', '23'])
     rows[0].set_value(0, 'Jane')
     self.assertEqual([str(v) for v in rows[0].values], ['Jane', '23'])
     self.assertEqual([str(v) for v in ds.rows[0].values], ['Alice', '23'])
     # Update dataset
     ds.rows[1].set_value('Age', '26')
     client.update_dataset('MyDataset', ds)
     ds = client.get_dataset('MyDataset')
     self.assertEqual([str(v) for v in ds.rows[1].values], ['Bob', '26'])
     # Value error when creating dataset with existing name
     with self.assertRaises(ValueError):
         client.create_dataset('MyDataset', ds)
     # Value error when retrieving unknown dataset
     with self.assertRaises(ValueError):
         client.get_dataset('SomeDataset')
     # Ensure the returned dataset contains the modified data
     client.rename_dataset('MyDataset', 'SomeDataset')
     ds = client.get_dataset('SomeDataset')
     # Ensure that access to unknown datasets is recorded
     with self.assertRaises(ValueError):
         client.get_dataset('ThisIsNotADataset')
     for name in ['somedataset', 'mydataset']:
         self.assertTrue(name in client.read)
         self.assertTrue(name in client.write)
     self.assertTrue('thisisnotadataset' in client.read)
     self.assertFalse('thisisnotadataset' in client.write)
示例#6
0
    def create_dataset(self, name, dataset, backend_options=[]):
        """Create a new dataset with given name.

        Raises ValueError if a dataset with given name already exist.

        Parameters
        ----------
        name : string
            Unique dataset name
        dataset : vizier.datastore.client.DatasetClient
            Dataset object

        Returns
        -------
        vizier.datastore.client.DatasetClient
        """
        # Raise an exception if a dataset with the given name already exists or
        # if the name is not valid
        if name.lower() in self.datasets:
            # Record access to the datasets
            raise ValueError('dataset \'' + name + '\' already exists')
        if not is_valid_name(name):
            raise ValueError('invalid dataset name \'' + name + '\'')
        # Create list of columns for new dataset. Ensure that every column has
        # a positive identifier
        columns = list()
        if len(dataset.columns) > 0:
            column_counter = max(
                max([col.identifier for col in dataset.columns]) + 1, 0)
            for col in dataset.columns:
                if col.identifier < 0:
                    col.identifier = column_counter
                    column_counter += 1
                columns.append(
                    DatasetColumn(identifier=col.identifier,
                                  name=col.name,
                                  data_type=col.data_type))
        rows = dataset.rows
        # Write dataset to datastore and add new dataset to context
        ds = self.datastore.create_dataset(columns=columns,
                                           rows=rows,
                                           properties=dataset.properties,
                                           human_readable_name=name,
                                           backend_options=backend_options)
        self.datasets[name.lower()] = ds
        self.write.add(name.lower())
        return DatasetClient(dataset=self.datastore.get_dataset(ds.identifier),
                             client=self,
                             existing_name=name.lower())