Пример #1
0
    def select(self,
               table,
               columns='*',
               where=None,
               sql='',
               toDict=True,
               toArray=False,
               distinct=False,
               limit=None,
               offset=None):
        """
        Construct and execute a SELECT statement, returning the results.
        
        ============== ================================================================
        **Arguments:**
        table          The name of the table from which to read data
        columns        (list or str) List of column names to read from table. The default is '*', which reads all columns
                       If *columns* is given as a string, it is inserted verbatim into the SQL command.
                       If *columns* is given as a list, it is converted to a string of comma-separated, quoted names.
        where          Optional dict of {column: value} pairs. only results where column=value will be returned
        distinct       (bool) If true, omit all redundant results
        limit          (int) Limit the number of results that may be returned (best used with offset argument)
        offset         (int) Omit a certain number of results from the beginning of the list
        sql            Optional string to be appended to the SQL query (will be inserted before limit/offset arguments)
        toDict         If True, return a list-of-dicts (this is the default)
        toArray        if True, return a numpy record array
        ============== ================================================================
        """
        p = debug.Profiler("SqliteDatabase.select", disabled=True)
        if columns != '*':
            #if isinstance(columns, basestring):
            #columns = columns.split(',')
            if not isinstance(columns, basestring):
                qf = []
                for f in columns:
                    if f == '*':
                        qf.append(f)
                    else:
                        qf.append('"' + f + '"')
                columns = ','.join(qf)
            #columns = quoteList(columns)

        whereStr = self._buildWhereClause(where, table)
        distinct = "distinct" if (distinct is True) else ""
        limit = ("limit %d" % limit) if (limit is not None) else ""
        offset = ("offset %d" % offset) if (offset is not None) else ""

        cmd = "SELECT %s %s FROM %s %s %s %s %s" % (
            distinct, columns, table, whereStr, sql, limit, offset)
        p.mark("generated command")
        q = self.exe(cmd, toDict=toDict, toArray=toArray)
        p.finish()
        return q
Пример #2
0
    def storeDBScan(self, scan, storeEvents=True):
        """Store all data for a scan, using cached values if possible"""
        p = debug.Profiler("Photostim.storeDBScan", disabled=True)

        if storeEvents:
            self.clearDBScan(scan)

        with pg.BusyCursor():
            #dh = scan.source()
            #print "Store scan:", scan.source().name()
            events = []
            stats = []
            spots = scan.spots()
            with pg.ProgressDialog("Preparing data for %s" % scan.name(), 0,
                                   len(spots) + 1) as dlg:

                ## collect events and stats from all spots in the scan
                for i in xrange(len(spots)):
                    s = spots[i]
                    fh = self.dataModel.getClampFile(s.data())
                    try:
                        ev = scan.getEvents(fh)['events']
                        events.append(ev)
                    except:
                        print fh, scan.getEvents(fh)
                        raise
                    st = scan.getStats(s.data())
                    stats.append(st)
                    dlg.setValue(i)
                    if dlg.wasCanceled():
                        raise HelpfulException("Scan store canceled by user.",
                                               msgType='status')

                p.mark("Prepared data")

            dbui = self.getElement('Database')
            db = dbui.getDb()
            with db.transaction():
                ## Store all events for this scan
                if storeEvents:
                    events = [x for x in events if len(x) > 0]

                    if len(events) > 0:
                        ev = np.concatenate(events)
                        p.mark("concatenate events")
                        self.detector.storeToDB(ev)
                        p.mark("stored all events")

                ## Store spot data
                self.storeStats(stats)
                p.mark("stored all stats")
                p.finish()
Пример #3
0
 def _readRecord(self, rec):
     prof = debug.Profiler("_readRecord", disabled=True)
     data = collections.OrderedDict()
     names = rec.keys()
     for i in range(len(rec)):
         val = rec[i]
         name = names[i]
         ## Unpickle byte arrays into their original objects.
         ## (Hopefully they were stored as pickled data in the first place!)
         if isinstance(val, buffer):
             val = pickle.loads(str(val))
         data[name] = val
     prof.finish()
     return data
Пример #4
0
 def _queryToArray(self, q):
     prof = debug.Profiler("_queryToArray", disabled=True)
     recs = self._queryToDict(q)
     prof.mark("got records")
     if len(recs) < 1:
         #return np.array([])  ## need to return empty array *with correct columns*, but this is very difficult, so just return None
         return None
     rec1 = recs[0]
     dtype = functions.suggestRecordDType(rec1, singleRecord=True)
     #print rec1, dtype
     arr = np.empty(len(recs), dtype=dtype)
     arr[0] = tuple(rec1.values())
     for i in xrange(1, len(recs)):
         arr[i] = tuple(recs[i].values())
     prof.mark('converted to array')
     prof.finish()
     return arr
Пример #5
0
 def select(self, table, columns='*', where=None, sql='', toDict=True, toArray=False, distinct=False, limit=None, offset=None):
     """Extends select to convert directory/file columns back into Dir/FileHandles. If the file doesn't exist, you will still get a handle, but it may not be the correct type."""
     prof = debug.Profiler("AnalysisDatabase.select()", disabled=True)
     
     data = SqliteDatabase.select(self, table, columns, where=where, sql=sql, distinct=distinct, limit=limit, offset=offset, toDict=True, toArray=False)
     data = TableData(data)
     prof.mark("got data from SQliteDatabase")
     
     config = self.getColumnConfig(table)
     
     ## convert file/dir handles
     for column, conf in config.iteritems():
         if column not in data.columnNames():
             continue
         
         if conf.get('Type', '').startswith('directory'):
             rids = set([d[column] for d in data])
             linkTable = conf['Link']
             handles = dict([(rid, self.getDir(linkTable, rid)) for rid in rids if rid is not None])
             handles[None] = None
             data[column] = map(handles.get, data[column])
                 
         elif conf.get('Type', None) == 'file':
             def getHandle(name):
                 if name is None:
                     return None
                 else:
                     if os.sep == '/':
                         sep = '\\'
                     else:
                         sep = '/'
                     name = name.replace(sep, os.sep) ## make sure file handles have an operating-system-appropriate separator (/ for Unix, \ for Windows)
                     return self.baseDir()[name]
             data[column] = map(getHandle, data[column])
             
     prof.mark("converted file/dir handles")
             
     ret = data.originalData()
     if toArray:
         ret = data.toArray()
         prof.mark("converted data to array")
     prof.finish()
     return ret
Пример #6
0
    def exe(self, cmd, data=None, batch=False, toDict=True, toArray=False):
        """Execute an SQL query. If data is provided, it should be a list of dicts and each will 
        be bound to the query and executed sequentially. Returns the query object.
        Arguments:
            cmd     - The SQL query to execute
            data    - List of dicts, one per record to be processed
                      For each record, data is bound to the query by key name
                      {"key1": "value1"}  =>  ":key1"="value1"
            batch   - If True, then all input data is processed in a single execution.
                      In this case, data must be provided as a dict-of-lists or record array.
            toDict  - If True, return a list-of-dicts representation of the query results
            toArray - If True, return a record array representation of the query results
        """
        p = debug.Profiler('SqliteDatabase.exe', disabled=True)
        p.mark('Command: %s' % cmd)

        if data is None:
            cur = self.db.execute(cmd)
            p.mark("Executed with no data")
        else:
            data = TableData(data)
            res = []
            if batch:
                cur = self.db.executemany(cmd, data.__iter__())
            else:
                for d in data:
                    p.mark("bound values for record")
                    self.db.execute(cmd, d)
                    p.mark("executed with data")

        if cmd is not None:
            if str(cmd)[:6].lower() == 'create':
                self.tables = None  ## clear table cache

        if toArray:
            ret = self._queryToArray(cur)
        elif toDict:
            ret = self._queryToDict(cur)
        else:
            ret = cur
        p.finish()
        return ret
Пример #7
0
    def storeToDB(self, data=None):
        p = debug.Profiler("EventDetector.storeToDB", disabled=True)

        if data is None:
            data = self.flowchart.output()['events']

        dbui = self.getElement('Database')
        table = dbui.getTableName(self.dbIdentity)
        db = dbui.getDb()
        if db is None:
            raise Exception("No DB selected")

        p.mark("DB prep done")

        if len(data) == 0:
            ## if there is no event data, then we need to delete previous event data

            dh = self.currentFile.name(relativeTo=db.baseDir())
            if dh[-10:] == '/Clamp1.ma' or dh[-10:] == '/Clamp2.ma':
                dh = dh[:-10]
            protocolID = db(
                'Select rowid, Dir from DirTable_Protocol where Dir="%s"' % dh)
            if len(protocolID) > 0:
                protocolID = protocolID[0]['rowid']
            else:
                return
            db('Delete from %s where ProtocolDir=%i' % (table, protocolID))
            return

        ## determine the set of fields we expect to find in the table
        columns = db.describeData(data)
        columns.update({
            'ProtocolSequenceDir': 'directory:ProtocolSequence',
            'ProtocolDir': 'directory:Protocol',
        })

        p.mark("field list done")

        with db.transaction():
            ## Make sure target table exists and has correct columns, links to input file
            db.checkTable(table,
                          owner=self.dbIdentity,
                          columns=columns,
                          create=True,
                          addUnknownColumns=True,
                          indexes=[['SourceFile'], ['ProtocolSequenceDir']])

            p.mark("data prepared")

            ## collect all protocol/Sequence dirs
            prots = {}
            seqs = {}
            for fh in set(data['SourceFile']):
                prots[fh] = fh.parent()
                seqs[fh] = self.dataModel.getParent(fh, 'ProtocolSequence')

            ## delete all records from table for current input files
            for fh in set(data['SourceFile']):
                db.delete(table, where={'SourceFile': fh})
            p.mark("previous records deleted")

            ## assemble final list of records
            records = {}
            for col in data.dtype.names:
                records[col] = data[col]
            records['ProtocolSequenceDir'] = map(seqs.get, data['SourceFile'])
            records['ProtocolDir'] = map(prots.get, data['SourceFile'])

            p.mark("record list assembled")

            ## insert all data to DB
            with pg.ProgressDialog("Storing events...", 0, 100) as dlg:
                for n, nmax in db.iterInsert(table, records, chunkSize=50):
                    dlg.setMaximum(nmax)
                    dlg.setValue(n)
                    if dlg.wasCanceled():
                        raise HelpfulException("Scan store canceled by user.",
                                               msgType='status')
            p.mark("records inserted")
            p.finish()
Пример #8
0
 def _queryToDict(self, q):
     prof = debug.Profiler("_queryToDict", disabled=True)
     res = []
     for rec in q:
         res.append(self._readRecord(rec))
     return res
Пример #9
0
    def iterInsert(self,
                   table,
                   records=None,
                   replaceOnConflict=False,
                   ignoreExtraColumns=False,
                   chunkSize=500,
                   chunkAll=False,
                   **args):
        """
        Iteratively insert chunks of data into a table while yielding a tuple (n, max)
        indicating progress. This *must* be used inside a for loop::
        
            for n,nmax in db.iterInsert(table, data):
                print "Insert %d%% complete" % (100. * n / nmax)
        
        Use the chunkSize argument to determine how many records are inserted per iteration.
        See insert() for a description of all other options.
        """

        p = debug.Profiler("SqliteDatabase.insert", disabled=True)
        if records is None:
            records = [args]
        #if type(records) is not list:
        #records = [records]
        if len(records) == 0:
            return
        ret = []

        with self.transaction():
            ## Rememember that _prepareData may change the number of columns!
            records = TableData(
                self._prepareData(table,
                                  records,
                                  ignoreUnknownColumns=ignoreExtraColumns,
                                  batch=True))
            p.mark("prepared data")

            columns = records.keys()
            insert = "INSERT"
            if replaceOnConflict:
                insert += " OR REPLACE"
            #print "Insert:", columns
            cmd = "%s INTO %s (%s) VALUES (%s)" % (
                insert, table, quoteList(columns), ','.join(
                    [':' + f for f in columns]))

            numRecs = len(records)
            if chunkAll:  ## insert all records in one go.
                self.exe(cmd, records, batch=True)
                yield (numRecs, numRecs)
                return

            chunkSize = int(chunkSize)  ## just make sure
            offset = 0
            i = 0
            while offset < len(records):
                #print len(columns), len(records[0]), len(self.tableSchema(table))
                chunk = records[offset:offset + chunkSize]
                self.exe(cmd, chunk, batch=True)
                offset += len(chunk)
                yield (offset, numRecs)
            p.mark("Transaction done")

        p.finish()