Пример #1
0
 def test_IsListOrTuple(self):
     obj1 = []
     obj2 = ('1', '2')
     obj3 = 'aaa'
     obj4 = 1.0
     obj5 = None
     self.assertTrue(isListOrTuple(obj1))
     self.assertTrue(isListOrTuple(obj2))
     self.assertFalse(isListOrTuple(obj3))
     self.assertFalse(isListOrTuple(obj4))
     self.assertFalse(isListOrTuple(obj5))
Пример #2
0
    def __init__(self, call_func, call_args, additional_data={}):
        r"""
Parameters
----------
call_func : function
    function to be executed as computational job

call_args : list/tuple
    positional arguments for the function to be executed as computational job

additional_data : dict
    any additional data that are associated with the job

Raises
------
Error
    if proper job function was not specified
Error
    if proper list/tuple of job arguments was not specified
Error
    if additional data could not be accessed (in sense of dict.update)
        """
        if not isinstance(call_func, types.FunctionType):
            raise Error('Function expected! (got %s)' % call_func.__class__)
        if not isListOrTuple(call_args):
            raise Error('List or tuple expected! (got %s)' %
                        call_args.__class__)
        self.call_func = call_func
        self.call_args = call_args
        self.additional_data = dict()
        try:
            self.additional_data.update(additional_data)
        except Exception, e:
            raise Error('Could not obtain job additional data! (Reason: %s)' %
                        e)
Пример #3
0
    def __init__(self, jobs_to_execute):
        r"""
Parameters
----------
jobs_to_execute : iterable of :class:`~kdvs.fw.Job.Job`
    jobs to be executed

Raises
------
Error
    if iterable is incorrectly specified
        """
        if not isListOrTuple(jobs_to_execute):
            raise Error('List or tuple expected! (got %s)' %
                        jobs_to_execute.__class__)
        self.jobs_to_execute = jobs_to_execute
Пример #4
0
    def __init__(self, in_dict):
        r"""
Parameters
----------
in_dict : dict
    dictionary containing simplified directives; the constructor checks if all
    required elements are present

Raises
------
Error
    if list/tuple of column names are not specified and/or empty
    if input dictionary is missing any of required elements
        """
        self._tmpl = dict()
        if all(k in in_dict for k in dbtemplate_keys):
            cls = in_dict['columns']
            if not isListOrTuple(cls) or len(cls) == 0:
                raise Error('Non-empty list or tuple expected! (got %s)' % cls.__class__)
            self._tmpl.update(in_dict)
        else:
            raise Error('%s must contain all of the following keys: %s !' % (
                           quote(className(self)), quote(' '.join(dbtemplate_keys))))
Пример #5
0
    def __init__(self, dbm, db_key, columns, name=None, id_col=None):
        r"""
Parameters
----------
dbm : :class:`~kdvs.core.db.DBManager`
    an instance of DB manager that is managing this table

db_key : string
    internal ID of the table used by DB manager instance; it is NOT the name of
    physical database table in underlying RDBMS; typically, user of DB manager
    refers to the table by this ID and not by its physical name

columns : list/tuple of strings
    column names for the table

name : string/None
    physical name of database table in underlying RDBMS; if None, the name is
    generated semi--randomly; NOTE: ordinary user of DB manager shall refer to
    the table with 'db_key' ID

id_col : string/None
    designates specific column to be "ID column"; if None, the first column is
    designated as ID column

Raises
------
Error
    if DBManager instance is not present
Error
    if list/tuple with column names is not present
Error
    if ID column name is not the one of existing columns
        """
        # ---- resolve DBManager
        if not isinstance(dbm, DBManager):
            raise Error('%s instance expected! (got %s)' % (DBManager.__class__, dbm.__class__))
        else:
            self.dbm = dbm
        # ---- get target DB
        db = dbm.getDB(db_key)
        self.db_key = db_key
        self.db = db
        # ---- resolve columns
        if isListOrTuple(columns):
            self.columns = tuple(columns)
        else:
            raise Error('List or tuple expected! (got %s)' % columns.__class__)
        # ---- resolve ID column
        if id_col is None:
            self.id_column_idx = 0
            self.id_column = self.columns[0]
        else:
            if id_col in self.columns:
                self.id_column_idx = self.columns.index(id_col)
                self.id_column = id_col
            else:
                raise Error('ID column must be one of the existing columns! (got %s)' % id_col)
        # ---- resolve table name
        if name is None:
            self.name = '%s%s' % (self.__class__.__name__, uuid.uuid4().hex)
        else:
            self.name = name
Пример #6
0
class DBTable(object):
    r"""
Low--level wrapper over database table managed by KDVS DB manager. KDVS uses
database tables to manage query--intensive information, such as the robust
generation of data subsets from single main input data set. The wrapper encapsulates
basic functionality incl. table creation, table filling from specific generator
function, querying with conditions over colums and rows (in case where first
column holds row IDs), generation of associated :class:`numpy.ndarray` object
(if possible), as well as basic counting routines.
    """
    def __init__(self, dbm, db_key, columns, name=None, id_col=None):
        r"""
Parameters
----------
dbm : :class:`~kdvs.core.db.DBManager`
    an instance of DB manager that is managing this table

db_key : string
    internal ID of the table used by DB manager instance; it is NOT the name of
    physical database table in underlying RDBMS; typically, user of DB manager
    refers to the table by this ID and not by its physical name

columns : list/tuple of strings
    column names for the table

name : string/None
    physical name of database table in underlying RDBMS; if None, the name is
    generated semi--randomly; NOTE: ordinary user of DB manager shall refer to
    the table with 'db_key' ID

id_col : string/None
    designates specific column to be "ID column"; if None, the first column is
    designated as ID column

Raises
------
Error
    if DBManager instance is not present
Error
    if list/tuple with column names is not present
Error
    if ID column name is not the one of existing columns
        """
        # ---- resolve DBManager
        if not isinstance(dbm, DBManager):
            raise Error('%s instance expected! (got %s)' % (DBManager.__class__, dbm.__class__))
        else:
            self.dbm = dbm
        # ---- get target DB
        db = dbm.getDB(db_key)
        self.db_key = db_key
        self.db = db
        # ---- resolve columns
        if isListOrTuple(columns):
            self.columns = tuple(columns)
        else:
            raise Error('List or tuple expected! (got %s)' % columns.__class__)
        # ---- resolve ID column
        if id_col is None:
            self.id_column_idx = 0
            self.id_column = self.columns[0]
        else:
            if id_col in self.columns:
                self.id_column_idx = self.columns.index(id_col)
                self.id_column = id_col
            else:
                raise Error('ID column must be one of the existing columns! (got %s)' % id_col)
        # ---- resolve table name
        if name is None:
            self.name = '%s%s' % (self.__class__.__name__, uuid.uuid4().hex)
        else:
            self.name = name

    def create(self, indexed_columns='*', debug=False):
        r"""
Physically create the table in underlying RDBMS; the creation is deferred until
this call. The table is created empty.

Parameters
----------
indexed_columns : list/tuple/'*'
    list/tuple of column names to be indexed by underlying RDBMS; if string '*'
    is specified, all columns will be indexed; '*' by default

debug : boolean
    provides debug mode for table creation; if True, collect all SQL statements
    produced by underlying RDBMS and return them as list of strings; if False,
    return None

Returns
-------
statements : list of strings/None
    RDBMS SQL statements issued during table creation, if debug mode is requested;
    or None otherwise

Raises
------
Error
    if table creation or indexing was interrupted with an error; essentially,
    reraise OperationalError from underlying RDBMS
        """
        statements = []
        # ---- create table
        cs = self.db.cursor()
        dberror = self.dbm.provider.getOperationalError()
        ctype = self.dbm.provider.getTextColumnType()
        # make columns
        cols = ','.join(['%s %s' % (quote(c), ctype) for c in self.columns])
        # make statement
        st = 'create table %s (%s)' % (quote(self.name), cols)
        if debug:
            statements.append(st)
        else:
            try:
                cs.execute(st)
            except dberror, e:
                raise Error('Cannot create table %s in database %s! (Reason: %s)' % (quote(self.name), quote(self.db_key), e))
        # ---- create indexes
        # resolve indexed columns
        if indexed_columns == '*':
            indexed = tuple(self.columns)
        else:
            if isListOrTuple(indexed_columns):
                indexed = tuple(indexed_columns)
            else:
                raise Error('List or tuple expected! (got %s)' % indexed_columns.__class__)
        # make indexes
        for ic in indexed:
            idx_name = '%s__%s' % (self.name, ic)
            idx_st = 'create index %s on %s(%s)' % (quote(idx_name), quote(self.name), quote(ic))
            if debug:
                statements.append(idx_st)
            else:
                try:
                    cs.execute(idx_st)
                except dberror, e:
                    raise Error('Cannot create index on column %s for table %s in database %s! (Reason: %s)' % (quote(ic), quote(self.name), quote(self.db_key), e))
Пример #7
0
    def get(self, columns='*', rows='*', filter_clause=None, debug=False):
        r"""
Perform query from the table under specified conditions and return corresponding
Cursor instance; the Cursor may be used immediately in straightforward manner or
may be wrapped in :class:`~kdvs.fw.DBResult.DBResult` instance.

Parameters
----------
columns : list/tuple/'*'
    list of column names that the quering will be performed from; if string '*'
    is specified instead, all columns will be queried; '*' by default

rows: list/tuple/'*'
    list of rows (i.e. list of values from designated ID column) that the
    quering will be performed for; if string '*' is specified instead, all rows
    (i.e. whole content of ID column) will be queried; '*' by default

filter_clause : string/None
    additional filtering conditions stated in the form of correct SQL WHERE
    clause suitable for underlying RDBMS; if None, no additional filtering is
    added; None by default

debug : boolean
    provides debug mode for table querying; if True, collect all SQL statements
    produced by underlying RDBMS and return them as list of strings; if False,
    return None; False by default; NOTE: for this method, debug mode DOES NOT
    perform any physical querying, it just produces underlyng SQL statements
    and returns them

Returns
-------
cs/statements : Cursor/list of strings
    if debug mode was not requested: proper Cursor instance that may be used
    immediately or wrapped into DBResult object; if debug mode was requested:
    RDBMS SQL statements issued during table querying

Raises
------
Error
    if list/tuple of columns/rows was specified incorrectly
    if specified list of columns/rows is empty
    if table querying was interrupted with an error; essentially, reraise
    OperationalError from underlying RDBMS

See Also
--------
:pep:`249`
        """
        statements = []
        cs = self.db.cursor()
        dberror = self.dbm.provider.getOperationalError()
        # ---- resolve columns
        if columns == '*':
            cols_st = columns
        else:
            if isListOrTuple(columns):
                if len(columns) == 0:
                    raise Error('Non-empty list of columns expected!')
            else:
                raise Error('List or tuple expected! (got %s)' % columns.__class__)
            cols_st = ','.join([quote(c) for c in columns])
        # ---- resolve rows
        if rows != '*':
            if isListOrTuple(rows):
                if len(rows) > 0:
                    rs = tuple(rows)
                else:
                    raise Error('Non-empty list of rows expected!')
            else:
                raise Error('List or tuple expected! (got %s)' % rows.__class__)
            rows_st = ','.join([quote(r) for r in rs])
        else:
            rows_st = rows
        # ---- make statement
        if rows_st == '*':
            # resolve filter clause
            if filter_clause is not None:
                flt_cl = ' where %s' % filter_clause
            else:
                flt_cl = ''
            get_st = 'select %s from %s%s' % (cols_st, quote(self.name), flt_cl)
        else:
            # resolve filter clause
            if filter_clause is not None:
                flt_cl = ' and %s' % filter_clause
            else:
                flt_cl = ''
            get_st = 'select %s from %s where %s in (%s)%s' % (cols_st, quote(self.name), quote(self.id_column), rows_st, flt_cl)
        # ---- get content
        if debug:
            statements.append(get_st)
        else:
            try:
                cs.execute(get_st)
            except dberror, e:
                raise Error('Cannot select from table %s in database %s! (Reason: %s) (Cols: %s) (Rows: %s)' % (
                                quote(self.name), quote(self.db_key), e, columns, rows))
Пример #8
0
    def getSubset(self,
                  pkcID,
                  forSamples='*',
                  get_ssinfo=True,
                  get_dataset=True):
        r"""
Generate data subset for specific prior knowledge concept, and wrap it into
:class:`~kdvs.fw.DataSet.DataSet` instance if requested. Optionally, it can also
generate only the information needed to create subset manually and not the subset
itself; this may be useful e.g. if data come from remote source that offers no
complete control over querying.

Parameters
----------
pkcID : string
    identifier of prior knowledge concept for which the data subset will be generated

forSamples : iterable/string
    samples that will be used to generate data subset; by default, prior knowledge
    is associated with individual measurements and treats samples as equal; this
    may be changed by specifying the individual samples to focus on (as tuple of
    strings) or specifying string '*' for considering all samples; '*' by default

get_ssinfo : boolean
    if True, generate runtime information about the data subset and return it;
    True by default

get_dataset : boolean
    if True, generate an instance of :class:`~kdvs.fw.DataSet.DataSet` that wraps
    the data subset and return it; True by default

Returns
-------
ssinfo : dict/None
    runtime information as a dictionary of the following elements

        * 'dtable' -- :class:`~kdvs.fw.DBTable.DBTable` instance of the primary input data set
        * 'rows' -- row IDs for the subset (typically, measurement IDs)
        * 'cols' -- column IDs for the subset (typically, sample names)
        * 'pkcID' -- prior knowledge concept ID used to generate the subset; can be None if 'get_ssinfo' parameter was False

subset_ds : :class:`~kdvs.fw.DataSet.DataSet`/None
    DataSet instance that holds the numerical information of the subset; can be
    None if 'get_dataset' parameter was False

Raises
------
Error
    if `forSamples` parameter value was incorrectly specified
        """
        if forSamples == '*':
            subset_cols = self.all_samples
        elif isListOrTuple(forSamples):
            subset_cols = list(forSamples)
        else:
            raise Error('Non-empty list, tuple, or "*" expected! (got %s)' %
                        (forSamples.__class__))
        # TODO: variables ID sorting introduced for compatibility with V1.0
        subset_vars = sorted(list(self.pkcidmap.pkc2emid[pkcID]))
        if get_ssinfo:
            ssinfo = dict()
            ssinfo['dtable'] = self.dtable
            ssinfo['rows'] = subset_vars
            ssinfo['cols'] = subset_cols
            ssinfo['pkcID'] = pkcID
        else:
            ssinfo = None
        if get_dataset:
            subset_ds = DataSet(dbtable=self.dtable,
                                cols=subset_cols,
                                rows=subset_vars,
                                remove_id_col=False)
        else:
            subset_ds = None
        return ssinfo, subset_ds
Пример #9
0
    def __init__(self,
                 dbm,
                 db_key,
                 filehandle,
                 dtname=None,
                 delimiter=None,
                 comment=None,
                 header=None,
                 make_missing_ID_column=True):
        r"""
Parameters
----------
dbm : :class:`~kdvs.core.db.DBManager`
    an instance of DB manager that is managing this table

db_key : string
    internal ID of the table used by DB manager instance; it is NOT the name of
    physical database table in underlying RDBMS; typically, user of DB manager
    refers to the table by this ID and not by its physical name

filehandle : file--like
    file handle to associated DSV file that contains the data that DSV table
    will hold; the file remains open but the data loading is deferred until
    requested

dtname : string/None
    physical name of database table in underlying RDBMS; if None, the name is
    generated semi--randomly; NOTE: ordinary user of DB manager shall refer to
    the table with 'db_key' ID

delimiter : string/None
    delimiter string of length 1 that should be used for parsing of DSV data;
    if None, the constructor tries to deduce delimiter by looking into first 10
    lines of associated DSV file; None by default; NOTE: giving explicit delimiter
    instead of deducing it dynamically greatly reduces possibility of errors
    during parsing DSV data

comment : string/None
    comment prefix used in associated DSV file, or None if comments are not used;
    None by default

header : list/tuple of string / None
    if header is present in the form of list/tuple of strings, it will be used
    as list of columns for the underlying database table; if None, the constructor
    tries to deduce the correct header by looking into first two lines of
    associated DSV file; None by default; NOTE: for well formed DSV files, header
    should be present, so it is relatively safe to deduce it automatically

make_missing_ID_column : boolean
    used in connection with previous argument; sometimes one can encounter DSV
    files that contain NO first column name in the header (e.g. generated from
    various R functions), and while they contain correct data, such files are
    syntactically incorrect; if the constructor sees lack of the first column name,
    it can proceed according to this parameter; if True, it inserts the content
    of :data:`DSV_DEFAULT_ID_COLUMN` variable as the missing column name; if False, it
    inserts empty string "" as the missing column name; True by default

Raises
------
Error
    if proper comment string was not specified
Error
    if underlying DSV dialect of associated DSV file has not been resolved correctly
Error
    if delimiter has not been specified correctly
Error
    if header iterable has not been specified correctly
Error
    if parsing of DSV data during deducing was interrupted with an error; essentially,
    it reraises underlying csv.Error

See Also
--------
csv
        """
        # ---- resolve comment
        if comment is not None and not isinstance(comment, basestring):
            raise Error('String or None expected! (got %s)' % comment)
        else:
            self.comment = comment
        # ---- resolve delimiter and dialect
        if delimiter is None:
            self._resolve_dialect(filehandle)
        else:
            if isinstance(delimiter, basestring) and len(delimiter) == 1:
                self.delimiter = delimiter
                try:
                    self.dialect = _dialects[delimiter]
                except KeyError:
                    raise Error(
                        'Dialect not identified for delimiter %s! (Sniffing required?)'
                        % quote(delimiter))
            else:
                raise Error('Single character expected! (got %s)' %
                            (delimiter))
        # ---- resolve header
        if header is None:
            self._extract_header(filehandle, make_missing_ID_column)
        else:
            if isListOrTuple(header):
                if len(header) == 0:
                    self._autogenerate_header(filehandle,
                                              make_missing_ID_column)
                else:
                    self._verify_header(filehandle, header)
                    self.header = header
            else:
                raise Error('List or tuple expected! (got %s)' %
                            header.__class__)
        # ---- DSV analysis finished, initialize underlying instance
        self.handle = filehandle
        super(DSV, self).__init__(dbm, db_key, self.header, dtname)