def test_IsListOrTuple(self): obj1 = [] obj2 = ('1', '2') obj3 = 'aaa' obj4 = 1.0 obj5 = None self.assertTrue(isListOrTuple(obj1)) self.assertTrue(isListOrTuple(obj2)) self.assertFalse(isListOrTuple(obj3)) self.assertFalse(isListOrTuple(obj4)) self.assertFalse(isListOrTuple(obj5))
def __init__(self, call_func, call_args, additional_data={}): r""" Parameters ---------- call_func : function function to be executed as computational job call_args : list/tuple positional arguments for the function to be executed as computational job additional_data : dict any additional data that are associated with the job Raises ------ Error if proper job function was not specified Error if proper list/tuple of job arguments was not specified Error if additional data could not be accessed (in sense of dict.update) """ if not isinstance(call_func, types.FunctionType): raise Error('Function expected! (got %s)' % call_func.__class__) if not isListOrTuple(call_args): raise Error('List or tuple expected! (got %s)' % call_args.__class__) self.call_func = call_func self.call_args = call_args self.additional_data = dict() try: self.additional_data.update(additional_data) except Exception, e: raise Error('Could not obtain job additional data! (Reason: %s)' % e)
def __init__(self, jobs_to_execute): r""" Parameters ---------- jobs_to_execute : iterable of :class:`~kdvs.fw.Job.Job` jobs to be executed Raises ------ Error if iterable is incorrectly specified """ if not isListOrTuple(jobs_to_execute): raise Error('List or tuple expected! (got %s)' % jobs_to_execute.__class__) self.jobs_to_execute = jobs_to_execute
def __init__(self, in_dict): r""" Parameters ---------- in_dict : dict dictionary containing simplified directives; the constructor checks if all required elements are present Raises ------ Error if list/tuple of column names are not specified and/or empty if input dictionary is missing any of required elements """ self._tmpl = dict() if all(k in in_dict for k in dbtemplate_keys): cls = in_dict['columns'] if not isListOrTuple(cls) or len(cls) == 0: raise Error('Non-empty list or tuple expected! (got %s)' % cls.__class__) self._tmpl.update(in_dict) else: raise Error('%s must contain all of the following keys: %s !' % ( quote(className(self)), quote(' '.join(dbtemplate_keys))))
def __init__(self, dbm, db_key, columns, name=None, id_col=None): r""" Parameters ---------- dbm : :class:`~kdvs.core.db.DBManager` an instance of DB manager that is managing this table db_key : string internal ID of the table used by DB manager instance; it is NOT the name of physical database table in underlying RDBMS; typically, user of DB manager refers to the table by this ID and not by its physical name columns : list/tuple of strings column names for the table name : string/None physical name of database table in underlying RDBMS; if None, the name is generated semi--randomly; NOTE: ordinary user of DB manager shall refer to the table with 'db_key' ID id_col : string/None designates specific column to be "ID column"; if None, the first column is designated as ID column Raises ------ Error if DBManager instance is not present Error if list/tuple with column names is not present Error if ID column name is not the one of existing columns """ # ---- resolve DBManager if not isinstance(dbm, DBManager): raise Error('%s instance expected! (got %s)' % (DBManager.__class__, dbm.__class__)) else: self.dbm = dbm # ---- get target DB db = dbm.getDB(db_key) self.db_key = db_key self.db = db # ---- resolve columns if isListOrTuple(columns): self.columns = tuple(columns) else: raise Error('List or tuple expected! (got %s)' % columns.__class__) # ---- resolve ID column if id_col is None: self.id_column_idx = 0 self.id_column = self.columns[0] else: if id_col in self.columns: self.id_column_idx = self.columns.index(id_col) self.id_column = id_col else: raise Error('ID column must be one of the existing columns! (got %s)' % id_col) # ---- resolve table name if name is None: self.name = '%s%s' % (self.__class__.__name__, uuid.uuid4().hex) else: self.name = name
class DBTable(object): r""" Low--level wrapper over database table managed by KDVS DB manager. KDVS uses database tables to manage query--intensive information, such as the robust generation of data subsets from single main input data set. The wrapper encapsulates basic functionality incl. table creation, table filling from specific generator function, querying with conditions over colums and rows (in case where first column holds row IDs), generation of associated :class:`numpy.ndarray` object (if possible), as well as basic counting routines. """ def __init__(self, dbm, db_key, columns, name=None, id_col=None): r""" Parameters ---------- dbm : :class:`~kdvs.core.db.DBManager` an instance of DB manager that is managing this table db_key : string internal ID of the table used by DB manager instance; it is NOT the name of physical database table in underlying RDBMS; typically, user of DB manager refers to the table by this ID and not by its physical name columns : list/tuple of strings column names for the table name : string/None physical name of database table in underlying RDBMS; if None, the name is generated semi--randomly; NOTE: ordinary user of DB manager shall refer to the table with 'db_key' ID id_col : string/None designates specific column to be "ID column"; if None, the first column is designated as ID column Raises ------ Error if DBManager instance is not present Error if list/tuple with column names is not present Error if ID column name is not the one of existing columns """ # ---- resolve DBManager if not isinstance(dbm, DBManager): raise Error('%s instance expected! (got %s)' % (DBManager.__class__, dbm.__class__)) else: self.dbm = dbm # ---- get target DB db = dbm.getDB(db_key) self.db_key = db_key self.db = db # ---- resolve columns if isListOrTuple(columns): self.columns = tuple(columns) else: raise Error('List or tuple expected! (got %s)' % columns.__class__) # ---- resolve ID column if id_col is None: self.id_column_idx = 0 self.id_column = self.columns[0] else: if id_col in self.columns: self.id_column_idx = self.columns.index(id_col) self.id_column = id_col else: raise Error('ID column must be one of the existing columns! (got %s)' % id_col) # ---- resolve table name if name is None: self.name = '%s%s' % (self.__class__.__name__, uuid.uuid4().hex) else: self.name = name def create(self, indexed_columns='*', debug=False): r""" Physically create the table in underlying RDBMS; the creation is deferred until this call. The table is created empty. Parameters ---------- indexed_columns : list/tuple/'*' list/tuple of column names to be indexed by underlying RDBMS; if string '*' is specified, all columns will be indexed; '*' by default debug : boolean provides debug mode for table creation; if True, collect all SQL statements produced by underlying RDBMS and return them as list of strings; if False, return None Returns ------- statements : list of strings/None RDBMS SQL statements issued during table creation, if debug mode is requested; or None otherwise Raises ------ Error if table creation or indexing was interrupted with an error; essentially, reraise OperationalError from underlying RDBMS """ statements = [] # ---- create table cs = self.db.cursor() dberror = self.dbm.provider.getOperationalError() ctype = self.dbm.provider.getTextColumnType() # make columns cols = ','.join(['%s %s' % (quote(c), ctype) for c in self.columns]) # make statement st = 'create table %s (%s)' % (quote(self.name), cols) if debug: statements.append(st) else: try: cs.execute(st) except dberror, e: raise Error('Cannot create table %s in database %s! (Reason: %s)' % (quote(self.name), quote(self.db_key), e)) # ---- create indexes # resolve indexed columns if indexed_columns == '*': indexed = tuple(self.columns) else: if isListOrTuple(indexed_columns): indexed = tuple(indexed_columns) else: raise Error('List or tuple expected! (got %s)' % indexed_columns.__class__) # make indexes for ic in indexed: idx_name = '%s__%s' % (self.name, ic) idx_st = 'create index %s on %s(%s)' % (quote(idx_name), quote(self.name), quote(ic)) if debug: statements.append(idx_st) else: try: cs.execute(idx_st) except dberror, e: raise Error('Cannot create index on column %s for table %s in database %s! (Reason: %s)' % (quote(ic), quote(self.name), quote(self.db_key), e))
def get(self, columns='*', rows='*', filter_clause=None, debug=False): r""" Perform query from the table under specified conditions and return corresponding Cursor instance; the Cursor may be used immediately in straightforward manner or may be wrapped in :class:`~kdvs.fw.DBResult.DBResult` instance. Parameters ---------- columns : list/tuple/'*' list of column names that the quering will be performed from; if string '*' is specified instead, all columns will be queried; '*' by default rows: list/tuple/'*' list of rows (i.e. list of values from designated ID column) that the quering will be performed for; if string '*' is specified instead, all rows (i.e. whole content of ID column) will be queried; '*' by default filter_clause : string/None additional filtering conditions stated in the form of correct SQL WHERE clause suitable for underlying RDBMS; if None, no additional filtering is added; None by default debug : boolean provides debug mode for table querying; if True, collect all SQL statements produced by underlying RDBMS and return them as list of strings; if False, return None; False by default; NOTE: for this method, debug mode DOES NOT perform any physical querying, it just produces underlyng SQL statements and returns them Returns ------- cs/statements : Cursor/list of strings if debug mode was not requested: proper Cursor instance that may be used immediately or wrapped into DBResult object; if debug mode was requested: RDBMS SQL statements issued during table querying Raises ------ Error if list/tuple of columns/rows was specified incorrectly if specified list of columns/rows is empty if table querying was interrupted with an error; essentially, reraise OperationalError from underlying RDBMS See Also -------- :pep:`249` """ statements = [] cs = self.db.cursor() dberror = self.dbm.provider.getOperationalError() # ---- resolve columns if columns == '*': cols_st = columns else: if isListOrTuple(columns): if len(columns) == 0: raise Error('Non-empty list of columns expected!') else: raise Error('List or tuple expected! (got %s)' % columns.__class__) cols_st = ','.join([quote(c) for c in columns]) # ---- resolve rows if rows != '*': if isListOrTuple(rows): if len(rows) > 0: rs = tuple(rows) else: raise Error('Non-empty list of rows expected!') else: raise Error('List or tuple expected! (got %s)' % rows.__class__) rows_st = ','.join([quote(r) for r in rs]) else: rows_st = rows # ---- make statement if rows_st == '*': # resolve filter clause if filter_clause is not None: flt_cl = ' where %s' % filter_clause else: flt_cl = '' get_st = 'select %s from %s%s' % (cols_st, quote(self.name), flt_cl) else: # resolve filter clause if filter_clause is not None: flt_cl = ' and %s' % filter_clause else: flt_cl = '' get_st = 'select %s from %s where %s in (%s)%s' % (cols_st, quote(self.name), quote(self.id_column), rows_st, flt_cl) # ---- get content if debug: statements.append(get_st) else: try: cs.execute(get_st) except dberror, e: raise Error('Cannot select from table %s in database %s! (Reason: %s) (Cols: %s) (Rows: %s)' % ( quote(self.name), quote(self.db_key), e, columns, rows))
def getSubset(self, pkcID, forSamples='*', get_ssinfo=True, get_dataset=True): r""" Generate data subset for specific prior knowledge concept, and wrap it into :class:`~kdvs.fw.DataSet.DataSet` instance if requested. Optionally, it can also generate only the information needed to create subset manually and not the subset itself; this may be useful e.g. if data come from remote source that offers no complete control over querying. Parameters ---------- pkcID : string identifier of prior knowledge concept for which the data subset will be generated forSamples : iterable/string samples that will be used to generate data subset; by default, prior knowledge is associated with individual measurements and treats samples as equal; this may be changed by specifying the individual samples to focus on (as tuple of strings) or specifying string '*' for considering all samples; '*' by default get_ssinfo : boolean if True, generate runtime information about the data subset and return it; True by default get_dataset : boolean if True, generate an instance of :class:`~kdvs.fw.DataSet.DataSet` that wraps the data subset and return it; True by default Returns ------- ssinfo : dict/None runtime information as a dictionary of the following elements * 'dtable' -- :class:`~kdvs.fw.DBTable.DBTable` instance of the primary input data set * 'rows' -- row IDs for the subset (typically, measurement IDs) * 'cols' -- column IDs for the subset (typically, sample names) * 'pkcID' -- prior knowledge concept ID used to generate the subset; can be None if 'get_ssinfo' parameter was False subset_ds : :class:`~kdvs.fw.DataSet.DataSet`/None DataSet instance that holds the numerical information of the subset; can be None if 'get_dataset' parameter was False Raises ------ Error if `forSamples` parameter value was incorrectly specified """ if forSamples == '*': subset_cols = self.all_samples elif isListOrTuple(forSamples): subset_cols = list(forSamples) else: raise Error('Non-empty list, tuple, or "*" expected! (got %s)' % (forSamples.__class__)) # TODO: variables ID sorting introduced for compatibility with V1.0 subset_vars = sorted(list(self.pkcidmap.pkc2emid[pkcID])) if get_ssinfo: ssinfo = dict() ssinfo['dtable'] = self.dtable ssinfo['rows'] = subset_vars ssinfo['cols'] = subset_cols ssinfo['pkcID'] = pkcID else: ssinfo = None if get_dataset: subset_ds = DataSet(dbtable=self.dtable, cols=subset_cols, rows=subset_vars, remove_id_col=False) else: subset_ds = None return ssinfo, subset_ds
def __init__(self, dbm, db_key, filehandle, dtname=None, delimiter=None, comment=None, header=None, make_missing_ID_column=True): r""" Parameters ---------- dbm : :class:`~kdvs.core.db.DBManager` an instance of DB manager that is managing this table db_key : string internal ID of the table used by DB manager instance; it is NOT the name of physical database table in underlying RDBMS; typically, user of DB manager refers to the table by this ID and not by its physical name filehandle : file--like file handle to associated DSV file that contains the data that DSV table will hold; the file remains open but the data loading is deferred until requested dtname : string/None physical name of database table in underlying RDBMS; if None, the name is generated semi--randomly; NOTE: ordinary user of DB manager shall refer to the table with 'db_key' ID delimiter : string/None delimiter string of length 1 that should be used for parsing of DSV data; if None, the constructor tries to deduce delimiter by looking into first 10 lines of associated DSV file; None by default; NOTE: giving explicit delimiter instead of deducing it dynamically greatly reduces possibility of errors during parsing DSV data comment : string/None comment prefix used in associated DSV file, or None if comments are not used; None by default header : list/tuple of string / None if header is present in the form of list/tuple of strings, it will be used as list of columns for the underlying database table; if None, the constructor tries to deduce the correct header by looking into first two lines of associated DSV file; None by default; NOTE: for well formed DSV files, header should be present, so it is relatively safe to deduce it automatically make_missing_ID_column : boolean used in connection with previous argument; sometimes one can encounter DSV files that contain NO first column name in the header (e.g. generated from various R functions), and while they contain correct data, such files are syntactically incorrect; if the constructor sees lack of the first column name, it can proceed according to this parameter; if True, it inserts the content of :data:`DSV_DEFAULT_ID_COLUMN` variable as the missing column name; if False, it inserts empty string "" as the missing column name; True by default Raises ------ Error if proper comment string was not specified Error if underlying DSV dialect of associated DSV file has not been resolved correctly Error if delimiter has not been specified correctly Error if header iterable has not been specified correctly Error if parsing of DSV data during deducing was interrupted with an error; essentially, it reraises underlying csv.Error See Also -------- csv """ # ---- resolve comment if comment is not None and not isinstance(comment, basestring): raise Error('String or None expected! (got %s)' % comment) else: self.comment = comment # ---- resolve delimiter and dialect if delimiter is None: self._resolve_dialect(filehandle) else: if isinstance(delimiter, basestring) and len(delimiter) == 1: self.delimiter = delimiter try: self.dialect = _dialects[delimiter] except KeyError: raise Error( 'Dialect not identified for delimiter %s! (Sniffing required?)' % quote(delimiter)) else: raise Error('Single character expected! (got %s)' % (delimiter)) # ---- resolve header if header is None: self._extract_header(filehandle, make_missing_ID_column) else: if isListOrTuple(header): if len(header) == 0: self._autogenerate_header(filehandle, make_missing_ID_column) else: self._verify_header(filehandle, header) self.header = header else: raise Error('List or tuple expected! (got %s)' % header.__class__) # ---- DSV analysis finished, initialize underlying instance self.handle = filehandle super(DSV, self).__init__(dbm, db_key, self.header, dtname)