示例#1
0
def findGeneNameGenomePairsLike(release, substring, searchType=CONTAINS_TYPE, conn=None):
    '''
    substring: search for gene names containing this string somehow.
    searchType: specify how the gene name should contain substring
    genomes names are like Homo_sapiens.aa
    returns: list of unique pairs of gene name and genome names for every gene name containing substring according to the searchType
    mapped to all genomes that contain a seq id that has that gene name.
    '''
    sql = ' SELECT DISTINCT rs.gene_name, rg.acc'
    sql += ' FROM {} rs JOIN {} rg '.format(releaseTable(release, 'sequence'), releaseTable(release, 'genomes'))
    sql += ' WHERE rs.gene_name LIKE %s'
    if searchType == CONTAINS_TYPE:
        args = ['%' + substring + '%']
    elif searchType == STARTS_WITH_TYPE:
        args = [substring + '%']
    elif searchType == ENDS_WITH_TYPE:
        args = ['%' + substring]
    elif searchType == EQUALS_TYPE:
        args = [substring]
    else:
        raise Exception('Unrecognized searchType.  searchType=%s'%searchType)
    sql += ' AND rs.genome_id = rg.id '
    sql += ' ORDER BY rg.acc, rs.gene_name '
    
    # results are a tuple of tuples which containing an gene name and genome and genome name.
    with connCM(conn=conn) as conn:
        return [tuple(row) for row in dbutil.selectSQL(sql=sql, args=args, conn=conn)]
示例#2
0
def numOrthologs(release, conn=None):
    num = 0
    with connCM(conn=conn) as conn:
        sql = 'SELECT num_orthologs FROM {}'.format(releaseTable(release, 'results'))
        rows = dbutil.selectSQL(sql=sql, conn=conn)
        num = sum(row[0] for row in rows)
    return num
示例#3
0
 def exists(self, key):
     encodedKey = json.dumps(key)
     with self.manager as conn:
         sql = 'SELECT id FROM ' + self.table + ' WHERE name = %s'
         results = dbutil.selectSQL(conn, sql, args=[encodedKey])
         return bool(
             results)  # True if there are any results, False otherwise.
示例#4
0
def selectOne(conn, sql, args=None):
    '''
    helper function that selects the first column of the first row
    assumption: there is at least one row with one column.  expect an error otherwise.
    '''
    with connCM(conn=conn) as conn:
        return dbutil.selectSQL(sql=sql, conn=conn, args=args)[0][0]
示例#5
0
def getGenomesAndNames(release):
    '''
    returns a list of pairs of genome (e.g. MYCGE) and name (e.g. Mycoplasma genitalium)
    '''
    sql = 'SELECT acc, name FROM {}'.format(releaseTable(release, 'genomes'))
    logging.debug(sql)
    with connCM() as conn:
        return dbutil.selectSQL(sql=sql, conn=conn)
示例#6
0
 def get(self, key, default=None):
     encodedKey = json.dumps(key)
     with self.manager as conn:
         sql = 'SELECT value FROM ' + self.table + ' WHERE name = %s'
         results = dbutil.selectSQL(conn, sql, args=[encodedKey])
         if results:
             value = json.loads(results[0][0])
         else:
             value = default
         return value
示例#7
0
 def get(self, key, default=None):
     encodedKey = json.dumps(key)
     with self.connect() as conn:
         sql = 'SELECT value FROM ' + self.table + ' WHERE name = %s'
         results = dbutil.selectSQL(conn, sql, args=[encodedKey])
         if results:
             value = json.loads(results[0][0])
         else:
             value = default
         return value
示例#8
0
def getGenomesData(release):
    '''
    returns a list of tuples, one for each genome, of acc, name, ncbi_taxon, taxon_category_code,
    taxon_category_name, and num_seqs.
    '''
    sql = '''SELECT acc, name, ncbi_taxon, taxon_category_code, taxon_category_name, num_seqs
    FROM {}'''.format(releaseTable(release, 'genomes'))
    # logging.debug(sql)
    with connCM() as conn:
        return dbutil.selectSQL(sql=sql, conn=conn)
示例#9
0
    def get(self, key, default=None):
        sql = " SELECT value FROM " + self.table + " WHERE id=%s"
        with self.manager as conn:
            results = dbutil.selectSQL(conn, sql, args=[self._cache_hash(key)])
            if results:
                value = json.loads(results[0][0])
            else:
                value = default

            # update access time
            sql = "UPDATE " + self.table + " SET access_time=NOW() WHERE id=%s"
            with dbutil.doTransaction(conn):
                dbutil.executeSQL(conn, sql, args=[self._cache_hash(key)])

            return value
示例#10
0
def getOrthologs(release, qdb, sdb, divergence='0.2', evalue='1e-20', conn=None):
    '''
    divergence: ortholog must have this divergence.  defaults to 0.2
    evalue: ortholog must have this evalue.  defaults to 1e-20.
    '''
    with connCM(conn=conn) as conn:
        qdbId = getIdForGenome(release, qdb, conn)
        sdbId = getIdForGenome(release, sdb, conn)
        divId = getIdForDivergence(release, divergence, conn)
        evalueId = getIdForEvalue(release, evalue, conn)
        sql = 'SELECT rr.orthologs '
        sql += ' FROM {} rr'.format(releaseTable(release, 'results'))
        sql += ' WHERE rr.query_db = %s AND rr.subject_db = %s AND rr.divergence = %s AND rr.evalue = %s '
        args = [qdbId, sdbId, divId, evalueId]
        logging.debug('sql='+sql)
        logging.debug('args='+str(args))
        rows = dbutil.selectSQL(sql=sql, args=args, conn=conn)
        return decodeOrthologs(rows[0][0])
示例#11
0
def getSeqIdsForGeneName(release, geneName, genome=None, conn=None):
    '''
    geneName: gene name or symbol, e.g. 'acsC'
    genome: genome id, e.g. 'Homo_sapiens.aa'.  If None, all seq ids with the gene name are returned.
    conn: db connection to use.  If None, one will be created.
    returns: list of sequence ids, i.e. a GI, ensembl id, etc., which have the given gene name.
    '''
    with connCM(conn=conn) as conn:
        sql = 'SELECT DISTINCT rs.external_sequence_id '
        sql += ' FROM {} rs '.format(releaseTable(release, 'sequence'))
        sql += ' WHERE rs.gene_name = %s '
        params = [geneName]
        if genome:
            dbId = getIdForGenome(release, genome, conn)
            sql += ' AND rs.genome_id = %s '
            params.append(dbId)

        # results are a tuple of tuples which containing an id.  convert to a list of ids.
        return [row[0] for row in dbutil.selectSQL(sql=sql, args=params, conn=conn)]
示例#12
0
 def _readUnhandled(self):
     '''
     Reads the next message from the queue.
     Returns: message_id, message.
     Use message_id to delete() the message when done or to changeTimeout() of the message if necessary.
     '''
     with self.manager as conn:
         with dbutil.doTransaction(conn):
             # read first available message (pending or lock timeout)
             sql = 'SELECT id, message FROM message_queue WHERE queue = %s AND (NOT locked OR  lock_time < CURRENT_TIMESTAMP)'
             sql += ' ORDER BY id ASC LIMIT 1 FOR UPDATE '
             results = dbutil.selectSQL(conn, sql, args=[self.queue])
             if results:
                 id, message = results[0]
                 # mark message unavailable for reading for timeout seconds.
                 sql = 'UPDATE message_queue SET locked = TRUE, read_time = CURRENT_TIMESTAMP, lock_time = ADDTIME(CURRENT_TIMESTAMP, SEC_TO_TIME(timeout)) WHERE id = %s'
                 dbutil.executeSQL(conn, sql, args=[id])
                 return id, message
             else:
                 raise EmptyQueueError(str(self.queue))
示例#13
0
def findGeneNamesLike(release, substring, searchType=CONTAINS_TYPE, conn=None):
    '''
    substring: search for gene names containing this string somehow.
    searchType: specify how the gene name should contain substring
    returns: list of every gene name containing substring according to the searchType.
    '''
    sql = ' SELECT DISTINCT rs.gene_name FROM {} rs '.format(releaseTable(release, 'sequence'))
    sql += ' WHERE rs.gene_name LIKE %s'
    if searchType == CONTAINS_TYPE:
        args = ['%' + substring + '%']
    elif searchType == STARTS_WITH_TYPE:
        args = [substring + '%']
    elif searchType == ENDS_WITH_TYPE:
        args = ['%' + substring]
    elif searchType == EQUALS_TYPE:
        args = [substring]
    else:
        raise Exception('Unrecognized searchType.  searchType=%s'%searchType)

    # results are a tuple of tuples which containing an id.  convert to a list of ids.
    with connCM(conn=conn) as conn:
        return [row[0] for row in dbutil.selectSQL(sql=sql, args=args, conn=conn)]
示例#14
0
def getIdForName(name, table, conn=None):
    sql = 'SELECT id FROM {} WHERE name=%s'.format(table)
    with connCM(conn=conn) as conn:
        rowset = dbutil.selectSQL(sql=sql, conn=conn, args=[name])
        return rowset[0][0]
示例#15
0
def getNameForId(id, table, conn=None):
    sql = 'SELECT name FROM {} WHERE id=%s'.format(table)
    with connCM(conn=conn) as conn:
        rowset = dbutil.selectSQL(sql=sql, conn=conn, args=[id])
        return rowset[0][0]
示例#16
0
def getGenomeToId(release):
    sql = 'select acc, id from {}'.format(releaseTable(release, 'genomes'))
    with connCM() as conn:
        return dict(dbutil.selectSQL(sql=sql, conn=conn))
示例#17
0
def getEvalueToId(release):
    sql = 'select name, id from {}'.format(releaseTable(release, 'evalues'))
    with connCM() as conn:
        return dict(dbutil.selectSQL(sql=sql, conn=conn))
示例#18
0
def getSequenceToId(release):
    sql = 'select external_sequence_id, id from {}'.format(releaseTable(release, 'sequence'))
    with connCM() as conn:
        return dict(dbutil.selectSQL(sql=sql, conn=conn))
示例#19
0
 def selectSequenceDataRows():
     return dbutil.selectSQL(sql=sql, conn=conn)
示例#20
0
 def selectTermsRows():
     return dbutil.selectSQL(sql=sql, conn=conn)
示例#21
0
 def exists(self, key):
     encodedKey = json.dumps(key)
     with self.connect() as conn:
         sql = 'SELECT id FROM ' + self.table + ' WHERE name = %s'
         results = dbutil.selectSQL(conn, sql, args=[encodedKey])
         return bool(results) # True if there are any results, False otherwise.
示例#22
0
 def has_key(self, key):
     sql = " SELECT id FROM " + self.table + " WHERE id=%s"
     with self.manager as conn:
         results = dbutil.selectSQL(conn, sql, args=[self._cache_hash(key)])
         foundKey = bool(results)
         return foundKey