Exemplo n.º 1
0
def get_genes_ids(db, location=None, use_strand=False, overlap=True):
    """
    Get ensembl ids for genes
    """

    sql, sql_parameters = location_to_sql(location, use_strand, overlap)
    sql = "{0} {1} {2}".format(SQL_GENES_SIMPLE, sql, SQL_GENES_SIMPLE_ORDER_BY)

    LOG.debug("SQL:\n{0}".format(sql))
    LOG.debug("PARAMETERS: {0}".format(sql_parameters))

    conn = sqlite3.connect(db)
    sqlite3.enable_callback_tracebacks(True)
    conn.row_factory = sqlite3.Row
    cursor = conn.cursor()

    cursor.execute(sql, sql_parameters)

    gene_ids = []
    for r in cursor:
        gene_ids.append(r['ensembl_id'])

    cursor.close()
    conn.close()
    return gene_ids
Exemplo n.º 2
0
def get_genes_simple(db, location=None, use_strand=False, overlap=True):

    sql, sql_parameters = location_to_sql(location, use_strand, overlap)
    sql = "{0} {1} {2}".format(SQL_GENES_SIMPLE, sql,
                               SQL_GENES_SIMPLE_ORDER_BY)

    LOG.debug("SQL:\n{0}".format(sql))
    LOG.debug("PARAMETERS: {0}".format(sql_parameters))

    conn = sqlite3.connect(db)
    sqlite3.enable_callback_tracebacks(True)
    conn.row_factory = sqlite3.Row
    cursor = conn.cursor()

    cursor.execute(sql, sql_parameters)

    genes = []
    for r in cursor:
        genes.append(
            Gene(r['ensembl_id'], r['seqid'], r['start'], r['end'],
                 r['strand']))

    cursor.close()
    conn.close()

    return genes
Exemplo n.º 3
0
def get_transcripts_simple(db, location=None, use_strand=False, overlap=True):
    sql, sql_parameters = location_to_sql(location, use_strand, overlap)
    sql = "{0} {1} {2}".format(SQL_TRANSCRIPTS_SIMPLE, sql,
                               SQL_TRANSCRIPTS_SIMPLE_ORDER_BY)

    LOG.debug("SQL:\n{0}".format(sql))
    LOG.debug("PARAMETERS: {0}".format(sql_parameters))

    conn = sqlite3.connect(db)
    sqlite3.enable_callback_tracebacks(True)
    conn.row_factory = sqlite3.Row
    cursor = conn.cursor()

    cursor.execute(sql, sql_parameters)

    transcripts = OrderedDict()
    exons = OrderedDict()
    for r in cursor:

        if r['transcript_id'] == r['child_id']:
            # transcript
            if r['_key'] not in transcripts:
                transcripts[r['_key']] = Transcript(r['transcript_id'],
                                                    r['transcript_seqid'],
                                                    r['transcript_start'],
                                                    r['transcript_end'],
                                                    r['transcript_strand'])
        else:
            # exon
            exon = exons.get(
                r['_child_key'],
                Exon(r['child_id'], r['child_seqid'], r['child_start'],
                     r['child_end'], r['child_strand']))
            exon.gene_id = r['transcript_id']
            exon.transcript_ids[r['transcript_id']] = r['transcript_id']
            attribute = r['gtf_attribute']
            value = r['value']

            if attribute == 'exon_number':
                exon.exon_number = value

            exons[r['_child_key']] = exon

    transcripts = {
        transcript.ensembl_id: transcript
        for i, transcript in transcripts.iteritems()
    }

    for _id, exon in exons.iteritems():
        for _tid in exon.transcript_ids:
            transcripts[_tid].exons[exon.ensembl_id] = exon

    cursor.close()
    conn.close()

    return transcripts.values()
Exemplo n.º 4
0
def get_transcripts_simple(db, location=None, use_strand=False, overlap=True):
    sql, sql_parameters = location_to_sql(location, use_strand, overlap)
    sql = "{0} {1} {2}".format(SQL_TRANSCRIPTS_SIMPLE, sql, SQL_TRANSCRIPTS_SIMPLE_ORDER_BY)

    LOG.debug("SQL:\n{0}".format(sql))
    LOG.debug("PARAMETERS: {0}".format(sql_parameters))

    conn = sqlite3.connect(db)
    sqlite3.enable_callback_tracebacks(True)
    conn.row_factory = sqlite3.Row
    cursor = conn.cursor()

    cursor.execute(sql, sql_parameters)

    transcripts = OrderedDict()
    exons = OrderedDict()
    for r in cursor:

        if r['transcript_id'] == r['child_id']:
            # transcript
            if r['_key'] not in transcripts:
                transcripts[r['_key']] = Transcript(r['transcript_id'], r['transcript_seqid'], r['transcript_start'], r['transcript_end'], r['transcript_strand'])
        else:
            # exon
            exon = exons.get(r['_child_key'], Exon(r['child_id'], r['child_seqid'], r['child_start'], r['child_end'], r['child_strand']))
            exon.gene_id = r['transcript_id']
            exon.transcript_ids[r['transcript_id']] = r['transcript_id']
            attribute = r['gtf_attribute']
            value = r['value']

            if attribute == 'exon_number':
                exon.exon_number = value

            exons[r['_child_key']] = exon

    transcripts = {transcript.ensembl_id: transcript for i, transcript in transcripts.iteritems()}

    for _id, exon in exons.iteritems():
        for _tid in exon.transcript_ids:
            transcripts[_tid].exons[exon.ensembl_id] = exon

    cursor.close()
    conn.close()

    return transcripts.values()
    def doSearch(self, commands, center, viewBounds = None):        
        results = None
        
        db = sqlite.connect(self.layer.filename)
        db.row_factory = sqlite.Row

        db.create_function("GeoDistanceSphere",4,GeoDistanceSphere)
        sqlite.enable_callback_tracebacks(True)
        
        print "Search Query:", commands
        
        # We assume that all the tables have the same tag set
        knownTags = [x[1] for x in db.execute("pragma table_info(world_point)")]
        del knownTags[knownTags.index("way")]
        del knownTags[knownTags.index("way_area")]
        del knownTags[knownTags.index("z_order")]
        
        parser = SearchParse.SearchParser(knownTags)
        tokens = parser.parse(commands)
        
        print "Parsed search:", str(tokens)
        
        query = parsedToSQLite(tokens, center, viewBounds)
        
        print "SQL:", query
        
        results = list()
        
        #FIXME: including spatial_idx without a proper join causes crazy duplicates, this hacks around that
        if "spatial_idx" in query:
            sql_queries = [
                "select %(tags)s, ST_AsText(Transform(way, 4326)) as point, ST_AsText(Transform(way, 4326)) as geom, 'point' as type from %(mapName)s_point as osm, idx_%(mapName)s_point_way as spatial_idx where %(query)s",
                "select %(tags)s, ST_AsText(Transform(ST_StartPoint(way), 4326)) as point, ST_AsText(Transform(way, 4326)) as geom, 'line' as type from %(mapName)s_line as osm, idx_%(mapName)s_line_way as spatial_idx where %(query)s",
                "select %(tags)s, ST_AsText(Transform(ST_Centroid(way), 4326)) as point, ST_AsText(Transform(way, 4326)) as geom, 'polygon' as type from %(mapName)s_polygon as osm, idx_%(mapName)s_polygon_way as spatial_idx where %(query)s",
            ]
        else:
            sql_queries = [
                "select %(tags)s, ST_AsText(Transform(way, 4326)) as point, ST_AsText(Transform(way, 4326)) as geom, 'point' as type from %(mapName)s_point as osm where %(query)s",
                "select %(tags)s, ST_AsText(Transform(ST_StartPoint(way), 4326)) as point, ST_AsText(Transform(way, 4326)) as geom, 'line' as type from %(mapName)s_line as osm where %(query)s",
                "select %(tags)s, ST_AsText(Transform(ST_Centroid(way), 4326)) as point, ST_AsText(Transform(way, 4326)) as geom, 'polygon' as type from %(mapName)s_polygon as osm where %(query)s",
            ]
        
        tag_query = ",".join(["\"%s\"" % t for t in knownTags])
        for sql in sql_queries:
            for row in db.execute(sql % {"mapName":"world", "query":query, "tags":tag_query}):
                loc = row["point"]
                try:
                    loc = loc.split("(")[1].split(")")[0].split(" ")
                except Exception, e:
                    print "Bad geometry (%s) for \"%s\": %s" % (str(e), row["name"], loc)
                    break
                loc = map(float, loc)
                
                distance = GeoDistanceSphere(center.x, center.y, loc[0], loc[1])
                result = {"type":row["type"], "name":row["name"], "loc":loc, "distance":distance}
        
                if row["type"] == "line":
                    if not row["geom"]:
                        print "Bad geometry for \"%s\": %s" % (row["name"], loc)
                        continue
                    try:
                        points = row["geom"].split("(")[1].split(")")[0].split(",")
                        points = [map(float, p.strip().split(" ")) for p in points]
                    except IndexError:
                        print "Bad geometry for \"%s\": %s" % (row["name"], loc)
                    except:
                        print row["geom"]
                        raise
                    result["line"] = points
                
                description = "\n".join(["%s: %s" % (t, row[str(t)]) for t in knownTags if row[str(t)]])
                result["description"] = description
                
                results.append(result)
Exemplo n.º 6
0
from twisted.trial import unittest

assert sqlite.version_info >= (2, 1)

class DBExcept(Exception):
    """An error occurred in accessing the database."""
    pass

class khash(str):
    """Dummy class to convert all hashes to base64 for storing in the DB."""

# Initialize the database to work with 'khash' objects (binary strings)
sqlite.register_adapter(khash, b2a_base64)
sqlite.register_converter("KHASH", a2b_base64)
sqlite.register_converter("khash", a2b_base64)
sqlite.enable_callback_tracebacks(True)

class DB:
    """An sqlite database for storing persistent files and hashes.
    
    @type db: L{twisted.python.filepath.FilePath}
    @ivar db: the database file to use
    @type conn: L{pysqlite2.dbapi2.Connection}
    @ivar conn: an open connection to the sqlite database
    """
    
    def __init__(self, db):
        """Load or create the database file.
        
        @type db: L{twisted.python.filepath.FilePath}
        @param db: the database file to use
Exemplo n.º 7
0

class DBExcept(Exception):
    """An error occurred in accessing the database."""
    pass


class khash(str):
    """Dummy class to convert all hashes to base64 for storing in the DB."""


# Initialize the database to work with 'khash' objects (binary strings)
sqlite.register_adapter(khash, b2a_base64)
sqlite.register_converter("KHASH", a2b_base64)
sqlite.register_converter("khash", a2b_base64)
sqlite.enable_callback_tracebacks(True)


class DB:
    """An sqlite database for storing persistent files and hashes.
    
    @type db: L{twisted.python.filepath.FilePath}
    @ivar db: the database file to use
    @type conn: L{pysqlite2.dbapi2.Connection}
    @ivar conn: an open connection to the sqlite database
    """
    def __init__(self, db):
        """Load or create the database file.
        
        @type db: L{twisted.python.filepath.FilePath}
        @param db: the database file to use