def test_attach(tmp_path): dbfile = str(tmp_path / "test.gsql") con = genomicsqlite.connect(dbfile, unsafe_load=True) _fill_exons(con, gri=False) con.commit() attach_script = genomicsqlite.attach_sql(con, str(tmp_path / "test_attached.gsql"), "db2", unsafe_load=True) con.executescript(attach_script) con.executescript("CREATE TABLE db2.exons2 AS SELECT * FROM exons") con.executescript( genomicsqlite.create_genomic_range_index_sql(con, "db2.exons2", "rid", "beg", "end")) ref_script = genomicsqlite.put_reference_assembly_sql( con, "GRCh38_no_alt_analysis_set", schema="db2") con.executescript(ref_script) query = ( "SELECT exons.id, db2.exons2.id FROM exons LEFT JOIN db2.exons2 ON db2.exons2._rowid_ IN\n" + genomicsqlite.genomic_range_rowids_sql( con, "db2.exons2", "exons.rid", "exons.beg", "exons.end") + " AND exons.id != db2.exons2.id ORDER BY exons.id, db2.exons2.id") results = list(con.execute(query)) assert len(results) == 5191 refseq_by_name = genomicsqlite.get_reference_sequences_by_name( con, schema="db2") assert len(refseq_by_name) > 24
def test_join(): for len_gri in (False, True): con = sqlite3.connect(":memory:") con.executescript( genomicsqlite.put_reference_assembly_sql( con, "GRCh38_no_alt_analysis_set")) _fill_exons(con, table="exons") _fill_exons(con, floor=2, table="exons2", len_gri=len_gri) con.commit() query = ( "SELECT exons.id, exons2.id FROM exons LEFT JOIN exons2 ON exons2._rowid_ IN\n" + genomicsqlite.genomic_range_rowids_sql( con, "exons2", "exons.rid", "exons.beg", "exons.end") + " AND exons.id != exons2.id ORDER BY exons.id, exons2.id") print(query) indexed = 0 for expl in con.execute("EXPLAIN QUERY PLAN " + query): print(expl[3]) if ("((_gri_rid,_gri_lvl,_gri_beg)>(?,?,?) AND (_gri_rid,_gri_lvl,_gri_beg)<(?,?,?))" in expl[3]): indexed += 1 assert indexed == 2 results = list(con.execute(query)) assert len(results) == 5191 assert len([result for result in results if result[1] is None]) == 5 control = "SELECT exons.id, exons2.id FROM exons LEFT JOIN exons2 NOT INDEXED ON NOT (exons2.end < exons.beg OR exons2.beg > exons.end) AND exons.id != exons2.id ORDER BY exons.id, exons2.id" control = list(con.execute(control)) assert results == control
def test_refseq(): con = sqlite3.connect(":memory:") create_assembly = genomicsqlite.put_reference_assembly_sql( con, "GRCh38_no_alt_analysis_set") lines = create_assembly.strip().split("\n") print("\n".join([line for line in lines if "INSERT INTO" in line][:24])) assert len([line for line in lines if "INSERT INTO" in line]) == 195 print("\n".join([line for line in lines if "INSERT INTO" not in line])) assert len([line for line in lines if "INSERT INTO" not in line]) == 2 con.executescript(create_assembly) _fill_exons(con, floor=2) con.commit() refseq_by_rid = genomicsqlite.get_reference_sequences_by_rid(con) refseq_by_name = genomicsqlite.get_reference_sequences_by_name(con) for refseq in refseq_by_rid.values(): assert refseq_by_rid[refseq.rid] == refseq assert refseq_by_name[refseq.name] == refseq if refseq.name == "chr17": assert refseq.rid == 17 assert refseq.length == 83257441 assert refseq.assembly == "GRCh38_no_alt_analysis_set" assert refseq.refget_id == "f9a0fb01553adb183568e3eb9d8626db" assert len(refseq_by_rid) == 195 query = ( "SELECT _gri_refseq._gri_rid, rid, beg, end, id FROM exons, _gri_refseq WHERE exons.rid = gri_refseq_name AND exons._rowid_ IN " + genomicsqlite.genomic_range_rowids_sql(con, "exons")) print("\n" + query) assert len([line for line in query.split("\n") if "BETWEEN" in line]) == 2 assert len(list(con.execute(query, ("chr17", 43115725, 43125370)))) == 56
def test_connect(tmp_path): dbfile = str(tmp_path / "test.gsql") con = genomicsqlite.connect(dbfile, unsafe_load=True) con.executescript( genomicsqlite.put_reference_assembly_sql(con, "GRCh38_no_alt_analysis_set")) _fill_exons(con) con.commit() del con con = genomicsqlite.connect(dbfile, read_only=True) query = ( "WITH exons2 AS (SELECT * from exons) SELECT exons.id, exons2.id FROM exons2 LEFT JOIN exons ON exons._rowid_ IN\n" + genomicsqlite.genomic_range_rowids_sql(con, "exons", "exons2.rid", "exons2.beg", "exons2.end") + " AND exons.id != exons2.id ORDER BY exons.id, exons2.id") results = list(con.execute(query)) assert len(results) == 5191