def get_region(c, args): region_regex = re.compile("(\S+):(\d+)-(\d+)") try: region = region_regex.findall(args.region)[0] except IndexError: sys.exit("Malformed region (--reg) string") if len(region) != 3: sys.exit("Malformed region (--reg) string") chrom = region[0] start = region[1] end = region[2] query = "SELECT * \ FROM variants v \ WHERE v.chrom = " + "'" + chrom + "'" + \ " AND ((v.start BETWEEN " + start + " AND " + end + ")" +\ " OR (v.end BETWEEN " + start + " AND " + end + "))" + \ "ORDER BY chrom, start" c.execute(query) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(c.description, ignore_gt_cols=True) if args.use_header: print args.separator.join(col for col in col_names) for row in c: print args.separator.join(str(row[i]) if row[i] is not None else "." for i in non_gt_idxs)
def get_genotypes(conn, metadata, args): """For each variant, report each sample's genotype on a separate line. """ idx_to_sample = util.map_indices_to_samples(metadata) query = "SELECT v.chrom, v.start, v.end, \ v.ref, v.alt, \ v.type, v.sub_type, \ v.aaf, v.in_dbsnp, v.gene, \ v.gts \ FROM variants v \ ORDER BY chrom, start" res = conn.execute(sql.text(query)) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(metadata.tables["variants"], ignore_gt_cols=True) col_names.append('sample') col_names.append('genotype') if args.use_header: print args.separator.join(col for col in col_names) for row in res: gts = Z.unpack_genotype_blob(row['gts']) for idx, gt in enumerate(gts): # xrange(len(row)-1) to avoid printing v.gts a = args.separator.join(str(row[i]) for i in xrange(len(row)-1)) b = args.separator.join([idx_to_sample[idx], gt]) print args.separator.join((a, b))
def get_genotypes(c, args): """For each variant, report each sample's genotype on a separate line. """ idx_to_sample = util.map_indicies_to_samples(c) query = "SELECT v.chrom, v.start, v.end, \ v.ref, v.alt, \ v.type, v.sub_type, \ v.aaf, v.in_dbsnp, v.gene, \ v.gts \ FROM variants v \ ORDER BY chrom, start" c.execute(query) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(c.description, ignore_gt_cols=True) col_names.append('sample') col_names.append('genotype') if args.use_header: print args.separator.join(col for col in col_names) for row in c: gts = np.array(cPickle.loads(zlib.decompress(row['gts']))) for idx, gt in enumerate(gts): # xrange(len(row)-1) to avoid printing v.gts print args.separator.join( str(row[i]) for i in xrange(len(row) - 1)), print args.separator.join([idx_to_sample[idx], gt])
def get_genotypes(c, args): """For each variant, report each sample's genotype on a separate line. """ idx_to_sample = util.map_indices_to_samples(c) query = "SELECT v.chrom, v.start, v.end, \ v.ref, v.alt, \ v.type, v.sub_type, \ v.aaf, v.in_dbsnp, v.gene, \ v.gts \ FROM variants v \ ORDER BY chrom, start" c.execute(query) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(c.description, ignore_gt_cols=True) col_names.append('sample') col_names.append('genotype') if args.use_header: print args.separator.join(col for col in col_names) for row in c: gts = np.array(cPickle.loads(zlib.decompress(row['gts']))) for idx, gt in enumerate(gts): # xrange(len(row)-1) to avoid printing v.gts print args.separator.join(str(row[i]) for i in xrange(len(row)-1)), print args.separator.join([idx_to_sample[idx], gt])
def get_samples(c, args): """ Report all of the information about the samples in the DB """ query = "SELECT * FROM samples" c.execute(query) (col_names, col_idxs) = util.get_col_names_and_indices(c.description) if args.use_header: print args.separator.join(col_names) for row in c: print args.separator.join(str(row[i]) if row[i] is not None else "." \ for i in xrange(len(row)) )
def get_samples(c, args): """ Report all of the information about the samples in the DB """ query = "SELECT * FROM samples" c.execute(query) (col_names, col_idxs) = util.get_col_names_and_indices(c.description) if args.use_header: print args.separator.join(col_names) for row in c: print args.separator.join(str(row[i]) if row[i] is not None else "." \ for i in xrange(len(row)) )
def get_samples(conn, metadata, args): """ Report all of the information about the samples in the DB """ query = "SELECT * FROM samples" res = conn.execute(query) (col_names, col_idxs) = util.get_col_names_and_indices(metadata.tables["samples"]) if args.use_header: print args.separator.join(col_names) for row in res: print args.separator.join(str(row[i]) if row[i] is not None else "." \ for i in xrange(len(row)) )
def get_samples(conn, metadata, args): """ Report all of the information about the samples in the DB """ query = "SELECT * FROM samples" res = conn.execute(query) (col_names, col_idxs) = util.get_col_names_and_indices(metadata.tables["samples"]) if args.use_header: print args.separator.join(col_names) for row in res: print args.separator.join(str(row[i]) if row[i] is not None else "." \ for i in xrange(len(row)) )
def get_variants(conn, metadata, args): """ Report all columns in the variant table, except for the genotype vectors. """ query = "SELECT * FROM variants \ ORDER BY chrom, start" res = conn.execute(query) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(metadata.tables["variants"], ignore_gt_cols=True) if args.use_header: print args.separator.join(col for col in col_names) for row in res: print args.separator.join('.' if (row[i] is None) else row[i].encode('utf-8') if type(row[i]) is unicode else str(row[i]) for i in non_gt_idxs)
def get_variants(conn, metadata, args): """ Report all columns in the variant table, except for the genotype vectors. """ query = "SELECT * FROM variants \ ORDER BY chrom, start" res = conn.execute(query) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(metadata.tables["variants"], ignore_gt_cols=True) if args.use_header: print args.separator.join(col for col in col_names) for row in res: print args.separator.join('.' if (row[i] is None) else row[i].encode('utf-8') if type(row[i]) is unicode else str(row[i]) for i in non_gt_idxs)
def get_variants(c, args): """ Report all columns in the variant table, except for the genotype vectors. """ query = "SELECT * FROM variants \ ORDER BY chrom, start" c.execute(query) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(c.description, ignore_gt_cols=True) if args.use_header: print args.separator.join(col for col in col_names) for row in c: print args.separator.join(str(row[i]) if row[i] is not None else "." \ for i in non_gt_idxs )
def get_variants(c, args): """ Report all columns in the variant table, except for the genotype vectors. """ query = "SELECT * FROM variants \ ORDER BY chrom, start" c.execute(query) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(c.description, ignore_gt_cols=True) if args.use_header: print args.separator.join(col for col in col_names) for row in c: print args.separator.join(str(row[i]) if row[i] is not None else "." \ for i in non_gt_idxs )
def get_genotypes(conn, metadata, args): """For each variant, report each sample's genotype on a separate line. """ idx_to_sample = util.map_indices_to_samples(metadata) query = "SELECT v.chrom, v.start, v.end, \ v.ref, v.alt, \ v.type, v.sub_type, \ v.aaf, v.in_dbsnp, v.gene, \ v.gts \ FROM variants v \ ORDER BY chrom, start" res = conn.execute(sql.text(query)) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(metadata.tables["variants"], ignore_gt_cols=True) col_names.append('sample') col_names.append('genotype') if args.use_header: print args.separator.join(col for col in col_names) unpack = Z.unpack_genotype_blob import zlib for row in res: try: gts = unpack(row['gts']) except zlib.error: unpack = Z.snappy_unpack_blob gts = unpack(row['gts']) for idx, gt in enumerate(gts): # xrange(len(row)-1) to avoid printing v.gts a = args.separator.join(str(row[i]) for i in xrange(len(row)-1)) b = args.separator.join([idx_to_sample[idx], gt]) print args.separator.join((a, b))
def get_gene(c, args): """ Report all variants in a specific gene. """ query = "SELECT * \ FROM variants v \ WHERE v.gene = " + "'" + args.gene + "' " \ "ORDER BY chrom, start" c.execute(query) # build a list of all the column indices that are NOT # gt_* columns. These will be the columns reported (col_names, non_gt_idxs) = \ util.get_col_names_and_indices(c.description, ignore_gt_cols=True) if args.use_header: print args.separator.join(col for col in col_names) for row in c: print args.separator.join(str(row[i]) if row[i] is not None else "." for i in non_gt_idxs)