def get_db_connection(filename): """Make a connection to an SQLite database.""" try: conn = sqlite3.connect(filename) except: logger.error("Failed to connect to SQLite db %s (exiting)" % filename) logger.error(last_exception()) sys.exit(1) return conn
def create_db_tables(conn): """Create the tables for the SQLite3 database.""" # SQL for each table creation sql_snp = """ DROP TABLE IF EXISTS snp_location; CREATE TABLE snp_location (snp_id TEXT, hg_version TEXT, chromosome TEXT, position INTEGER, PRIMARY KEY(snp_id, hg_version)); """ sql_gtype = """ DROP TABLE IF EXISTS genotypes; CREATE TABLE genotypes (snp_id TEXT NOT NULL, genotype TEXT NOT NULL, PRIMARY KEY(snp_id, genotype), FOREIGN KEY(snp_id) REFERENCES snp(snp_id) ON DELETE CASCADE); """ sql_person = """ DROP TABLE IF EXISTS person; CREATE TABLE person (person_id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT) """ sql_person_gtype = """ DROP TABLE IF EXISTS person_gtype; CREATE TABLE person_gtype (person_id INTEGER NOT NULL, snp_id INTEGER NOT NULL, genotype TEXT NOT NULL, PRIMARY KEY(person_id, snp_id, genotype), FOREIGN KEY(person_id) REFERENCES person(person_id) ON DELETE CASCADE, FOREIGN KEY(snp_id, genotype) REFERENCES snp(snp_id, genotype) ON DELETE CASCADE); """ # Create each table in turn for tname, sql in [("snp_location", sql_snp), ("genotype", sql_gtype), ("person", sql_person), ("person_gtype", sql_person_gtype)]: with conn: cur = conn.cursor() try: logger.info("Creating table %s" % tname) cur.executescript(sql) except: logger.error("Could not create table % (exiting)" % tname) logger.error(last_exception()) sys.exit(1)
def create_db_views(conn): """Create views on the database.""" sql_snpcount = """ DROP VIEW IF EXISTS snp_counts; CREATE VIEW snp_counts AS SELECT snp_id, COUNT(*) as count FROM genotypes GROUP BY snp_id; """ # Create each view in turn for tname, sql in [("snpcount", sql_snpcount)]: with conn: cur = conn.cursor() try: logger.info("Creating view %s" % tname) cur.executescript(sql) except: logger.error("Could not create view % (exiting)" % tname) logger.error(last_exception()) sys.exit(1)
def populate_db(name, snpfile, asm, conn): """Using the database with the connection in conn, add data for the named person from the passed snpfile. """ # Get filehandle try: fh = open(snpfile, 'rU') except: logger.error("Could not open SNP file %s" % snpfile) logger.error(last_exception()) sys.exit(1) # Load data into database logger.info("Processing %s" % snpfile) with fh as snpfh: with conn: cur = conn.cursor() # Populate the person table. At the mom sql = "INSERT INTO person(name) VALUES (?)" cur.execute(sql, (name, )) person_id = cur.lastrowid # Parse the SNP file snpreader = csv.reader(snpfh, delimiter='\t') try: for row in snpreader: if not row[0].startswith('#'): # ignore comments rsid, chrm, pos, gt = tuple(row) pos = int(pos) # Add snp location try: sql = "INSERT INTO snp_location(snp_id, " +\ "hg_version, chromosome, " +\ "position) VALUES (?, ?, ?, ?)" cur.execute(sql, (rsid, asm, chrm, pos)) except sqlite3.IntegrityError: # This will throw an error if the SNP location (on # this HG build) is already found in the db logger.warning("SNP %s position " % rsid + "on HG assembly build " + "%s already present" % asm) # Insert genotype try: sql = "INSERT INTO genotypes(snp_id, genotype) " +\ "VALUES (?, ?)" cur.execute(sql, (rsid, gt)) except sqlite3.IntegrityError: # This will throw an error if the genotype exists # in the db logger.warning("Genotype %s already " % gt + "present for SNP %s" % rsid) # Link individual to genotype try: sql = "INSERT INTO person_gtype(person_id, " +\ "snp_id, genotype) VALUES (?, ?, ?)" cur.execute(sql, (person_id, rsid, gt)) except: logger.error("Problem populating database at " + "row %s (exiting)" % row) sys.exit(1) except: logger.error("Problem parsing SNP file %s" % snpfile) logger.error(last_exception()) sys.exit(1)