Exemplo n.º 1
0
 def test_recache3(self):
     # wrap existing numpy array
     ds = DataSet(input_array=self.array1)
     numpy.testing.assert_array_almost_equal(self.array1, ds.array)
     # perform recache (we shall see no change)
     ds.recache()
     numpy.testing.assert_array_almost_equal(self.array1, ds.array)
Exemplo n.º 2
0
 def test_recache1(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.loadAll()
     # data set by default spanning all rows and all columns
     ds = DataSet(dbtable=dsv)
     dsv.close()
     numpy.testing.assert_array_almost_equal(self.array1, ds.array)
     # perform recache (we shall see no change)
     ds.recache()
     numpy.testing.assert_array_almost_equal(self.array1, ds.array)
Exemplo n.º 3
0
 def test_init3(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.close()
     with self.assertRaises(Error):
         DataSet(dbtable=dsv, cols='BBB')
Exemplo n.º 4
0
 def test_init4(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.close()
     with self.assertRaises(Error):
         DataSet(dbtable=dsv,
                 rows=self.sample_rows_none,
                 cols=self.sample_cols_none)
Exemplo n.º 5
0
 def test_init7(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.loadAll()
     dsv.close()
     # get only first row
     ds = DataSet(dbtable=dsv, rows=self.sample_rows_1)
     numpy.testing.assert_array_almost_equal(self.array_v1, ds.array)
Exemplo n.º 6
0
 def test_init5(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     # data set by default spanning all rows and all columns
     # data not loaded, we shall see empty array
     dsv.create()
     dsv.close()
     # NOTE: before numpy 1.6.0, empty file in loadtxt() generates IOError,
     # with 1.6.0+ only warning
     if check_min_numpy_version(1, 6, 0):
         # suppress numpy warning of empty source file
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             ds = DataSet(dbtable=dsv)
         numpy.testing.assert_equal(self.empty_array, ds.array)
     else:
         with self.assertRaises(Error):
             DataSet(dbtable=dsv)
Exemplo n.º 7
0
 def test_recache2(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.loadAll()
     # data set by default spanning all rows and all columns
     ds = DataSet(dbtable=dsv)
     dsv.close()
     numpy.testing.assert_array_almost_equal(self.array1, ds.array)
     # wipe out underlying table
     wipe_cs = dsv.db.cursor()
     wipe_cs.execute('delete from "%s";' % self.test_dtname)
     dsv.db.commit()
     wipe_cs.execute('vacuum;')
     dsv.db.commit()
     # NOTE: before numpy 1.6.0, empty file in loadtxt() generates IOError,
     # with 1.6.0+ only warning
     if check_min_numpy_version(1, 6, 0):
         # perform recache (we shall see empty array)
         # suppress numpy warning of empty source file
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             ds.recache()
         numpy.testing.assert_array_almost_equal(self.empty_array, ds.array)
     else:
         with self.assertRaises(Error):
             ds.recache()
Exemplo n.º 8
0
    def getSubset(self,
                  pkcID,
                  forSamples='*',
                  get_ssinfo=True,
                  get_dataset=True):
        r"""
Generate data subset for specific prior knowledge concept, and wrap it into
:class:`~kdvs.fw.DataSet.DataSet` instance if requested. Optionally, it can also
generate only the information needed to create subset manually and not the subset
itself; this may be useful e.g. if data come from remote source that offers no
complete control over querying.

Parameters
----------
pkcID : string
    identifier of prior knowledge concept for which the data subset will be generated

forSamples : iterable/string
    samples that will be used to generate data subset; by default, prior knowledge
    is associated with individual measurements and treats samples as equal; this
    may be changed by specifying the individual samples to focus on (as tuple of
    strings) or specifying string '*' for considering all samples; '*' by default

get_ssinfo : boolean
    if True, generate runtime information about the data subset and return it;
    True by default

get_dataset : boolean
    if True, generate an instance of :class:`~kdvs.fw.DataSet.DataSet` that wraps
    the data subset and return it; True by default

Returns
-------
ssinfo : dict/None
    runtime information as a dictionary of the following elements

        * 'dtable' -- :class:`~kdvs.fw.DBTable.DBTable` instance of the primary input data set
        * 'rows' -- row IDs for the subset (typically, measurement IDs)
        * 'cols' -- column IDs for the subset (typically, sample names)
        * 'pkcID' -- prior knowledge concept ID used to generate the subset; can be None if 'get_ssinfo' parameter was False

subset_ds : :class:`~kdvs.fw.DataSet.DataSet`/None
    DataSet instance that holds the numerical information of the subset; can be
    None if 'get_dataset' parameter was False

Raises
------
Error
    if `forSamples` parameter value was incorrectly specified
        """
        if forSamples == '*':
            subset_cols = self.all_samples
        elif isListOrTuple(forSamples):
            subset_cols = list(forSamples)
        else:
            raise Error('Non-empty list, tuple, or "*" expected! (got %s)' %
                        (forSamples.__class__))
        # TODO: variables ID sorting introduced for compatibility with V1.0
        subset_vars = sorted(list(self.pkcidmap.pkc2emid[pkcID]))
        if get_ssinfo:
            ssinfo = dict()
            ssinfo['dtable'] = self.dtable
            ssinfo['rows'] = subset_vars
            ssinfo['cols'] = subset_cols
            ssinfo['pkcID'] = pkcID
        else:
            ssinfo = None
        if get_dataset:
            subset_ds = DataSet(dbtable=self.dtable,
                                cols=subset_cols,
                                rows=subset_vars,
                                remove_id_col=False)
        else:
            subset_ds = None
        return ssinfo, subset_ds
Exemplo n.º 9
0
 def test_rowscols1(self):
     # wrap existing numpy array
     ds = DataSet(input_array=self.array1)
     # check default rows and columns
     self.assertSequenceEqual(self.array1_rows, ds.rows)
     self.assertSequenceEqual(self.array1_cols, ds.cols)
Exemplo n.º 10
0
 def test_init9(self):
     with self.assertRaises(Error):
         DataSet(input_array='XXXXX')
Exemplo n.º 11
0
 def test_init8(self):
     # wrap existing numpy array
     ds = DataSet(input_array=self.array1)
     numpy.testing.assert_array_almost_equal(self.array1, ds.array)