def amend_sample(args): loaded_subjects = get_subjects(args) ped_dict = load_ped_file(args.sample) header = get_ped_fields(args.sample) with database_transaction(args.db) as c: for k, v in loaded_subjects.items(): if k in ped_dict: item_list = map(quote_string, ped_dict[k]) sample = zip(header, item_list) set_str = ",".join([str(x) + "=" + str(y) for (x, y) in sample]) sql_query = "update samples set {0} where sample_id={1}" c.execute(sql_query.format(set_str, v.sample_id))
def amend_sample(args): loaded_subjects = get_subjects(args) ped_dict = load_ped_file(args.sample) header = get_ped_fields(args.sample) with database_transaction(args.db) as c: for k, v in loaded_subjects.items(): if k in ped_dict: item_list = map(quote_string, ped_dict[k]) sample = zip(header, item_list) set_str = ",".join( [str(x) + "=" + str(y) for (x, y) in sample]) sql_query = "update samples set {0} where sample_id={1}" c.execute(sql_query.format(set_str, v.sample_id))
def annotate(parser, args): check_dependencies("annotate", [["tabix", "-h"], ["bgzip", "-h"]]) def _validate_args(args): if (args.col_operations or args.col_types or args.col_extracts): sys.exit('EXITING: You may only specify a column name (-c) when ' 'using \"-a boolean\" or \"-a count\".\n') col_names = args.col_names.split(',') if len(col_names) > 1: sys.exit('EXITING: You may only specify a single column name (-c) ' 'when using \"-a boolean\" or \"-a count\".\n') if not args.anno_file.endswith(('.vcf', '.vcf.gz')) and args.region_only and parser is not None: sys.exit('EXITING: You may only specify --region-only when annotation is a VCF.') return col_names def _validate_extract_args(args): if args.anno_file.endswith(('.vcf', '.vcf.gz')): if not args.col_names: args.col_names = args.col_extracts elif not args.col_extracts: args.col_extracts = args.col_names elif args.region_only and parser is not None: sys.exit('EXITING: You may only specify --region-only when annotation is a VCF.1') if not args.col_types: sys.exit('EXITING: need to give column types ("-t")\n') col_ops = args.col_operations.split(',') col_idxs = args.col_extracts.split(',') col_names = args.col_names.split(',') col_types = args.col_types.split(',') supported_types = ['text', 'float', 'integer'] for col_type in col_types: if col_type not in supported_types: sys.exit('EXITING: Column type [%s] not supported.\n' % (col_type)) supported_ops = op_funcs.keys() for col_op in col_ops: if col_op not in supported_ops: sys.exit('EXITING: Column operation [%s] not supported.\n' % (col_op)) if not (len(col_ops) == len(col_names) == len(col_types) == len(col_idxs)): sys.exit('EXITING: The number of column names, numbers, types, and ' 'operations must match: [%s], [%s], [%s], [%s]\n' % (args.col_names, args.col_extracts, args.col_types, args.col_operations)) return col_names, col_types, col_ops, col_idxs if (args.db is None): parser.print_help() exit(1) if not os.path.exists(args.db): sys.stderr.write("Error: cannot find database file.") exit(1) if not os.path.exists(args.anno_file): sys.stderr.write("Error: cannot find annotation file.") exit(1) conn = sqlite3.connect(args.db) conn.row_factory = sqlite3.Row # allow us to refer to columns by name conn.isolation_level = None if args.anno_type == "boolean": col_names = _validate_args(args) annotate_variants_bool(args, conn, col_names) elif args.anno_type == "count": col_names = _validate_args(args) annotate_variants_count(args, conn, col_names) elif args.anno_type == "extract": if args.col_extracts is None and not args.anno_file.endswith('.vcf.gz'): sys.exit("You must specify which column to " "extract from your annotation file.") else: col_names, col_types, col_ops, col_idxs = _validate_extract_args(args) annotate_variants_extract(args, conn, col_names, col_types, col_ops, col_idxs) else: sys.exit("Unknown column type requested. Exiting.") conn.close() # index on the newly created columns for col_name in col_names: with database_transaction(args.db) as c: c.execute('''drop index if exists %s''' % (col_name + "idx")) c.execute('''create index %s on variants(%s)''' % (col_name + "idx", col_name))
def annotate(parser, args): def _validate_args(args): if (args.col_operations or args.col_types or args.col_extracts): sys.exit('EXITING: You may only specify a column name (-c) when ' 'using \"-a boolean\" or \"-a count\".\n') col_names = args.col_names.split(',') if len(col_names) > 1: sys.exit('EXITING: You may only specify a single column name (-c) ' 'when using \"-a boolean\" or \"-a count\".\n') return col_names def _validate_extract_args(args): col_ops = args.col_operations.split(',') col_names = args.col_names.split(',') col_types = args.col_types.split(',') col_idxs = args.col_extracts.split(',') supported_types = ['text', 'float', 'integer'] for col_type in col_types: if col_type not in supported_types: sys.exit('EXITING: Column type [%s] not supported.\n' % (col_type)) supported_ops = [ 'mean', 'median', 'mode', 'min', 'max', 'first', 'last', 'list', 'uniq_list' ] for col_op in col_ops: if col_op not in supported_ops: sys.exit('EXITING: Column operation [%s] not supported.\n' % (col_op)) if not (len(col_ops) == len(col_names) == len(col_types) == len(col_idxs)): sys.exit( 'EXITING: The number of column names, numbers, types, and ' 'operations must match: [%s], [%s], [%s], [%s]\n' % (args.col_names, args.col_extracts, args.col_types, args.col_operations)) return col_names, col_types, col_ops, col_idxs if (args.db is None): parser.print_help() exit(1) if not os.path.exists(args.db): sys.stderr.write("Error: cannot find database file.") exit(1) if not os.path.exists(args.anno_file): sys.stderr.write("Error: cannot find annotation file.") exit(1) conn = sqlite3.connect(args.db) conn.row_factory = sqlite3.Row # allow us to refer to columns by name conn.isolation_level = None if args.anno_type == "boolean": col_names = _validate_args(args) annotate_variants_bool(args, conn, col_names) elif args.anno_type == "count": col_names = _validate_args(args) annotate_variants_count(args, conn, col_names) elif args.anno_type == "extract": if args.col_extracts is None: sys.exit("You must specify which column to " "extract from your annotation file.") else: col_names, col_types, col_ops, col_idxs = _validate_extract_args( args) annotate_variants_extract(args, conn, col_names, col_types, col_ops, col_idxs) else: sys.exit("Unknown column type requested. Exiting.") conn.close() # index on the newly created columns for col_name in col_names: with database_transaction(args.db) as c: c.execute('''create index %s on variants(%s)''' % (col_name + "idx", col_name))
def annotate(parser, args): check_dependencies("annotate", [["tabix", "-h"], ["bgzip", "-h"]]) def _validate_args(args): if (args.col_operations or args.col_types or args.col_extracts): raise ValueError( 'You must not specify a column type (-t), op (-o) or extract (-e) when ' 'using \"-a boolean\" or \"-a count\".\n') col_names = args.col_names.split(',') if len(col_names) > 1: raise ValueError('You may only specify a single column name (-c) ' 'when using \"-a boolean\" or \"-a count\".\n') if not args.anno_file.endswith( ('.vcf', '.vcf.gz')) and args.region_only and parser is not None: raise ValueError( 'You may only specify --region-only when annotation is a VCF.') return col_names def _validate_extract_args(args): if args.anno_file.endswith(('.vcf', '.vcf.gz')): if not args.col_names: args.col_names = args.col_extracts elif not args.col_extracts: args.col_extracts = args.col_names elif args.region_only and parser is not None: raise ValueError( 'You may only specify --region-only when annotation is a VCF.1' ) if not args.col_types: raise ValueError('need to give column types ("-t")\n') col_ops = args.col_operations.split(',') col_idxs = args.col_extracts.split(',') col_names = args.col_names.split(',') col_types = args.col_types.split(',') supported_types = ['text', 'float', 'integer'] for col_type in col_types: if col_type not in supported_types: raise ValueError('Column type [%s] not supported.\n' % (col_type)) supported_ops = op_funcs.keys() for col_op in col_ops: if col_op not in supported_ops: raise ValueError('Column operation [%s] not supported.\n' % (col_op)) if not (len(col_ops) == len(col_names) == len(col_types) == len(col_idxs)): raise ValueError( 'The number of column names, numbers, types, and ' 'operations must match: [%s], [%s], [%s], [%s]\n' % (args.col_names, args.col_extracts, args.col_types, args.col_operations)) return col_names, col_types, col_ops, col_idxs if (args.db is None): parser.print_help() exit(1) if not os.path.exists(args.anno_file): sys.stderr.write("Error: cannot find annotation file.") exit(1) conn, metadata = database.get_session_metadata(args.db) if args.anno_type == "boolean": col_names = _validate_args(args) annotate_variants_bool(args, conn, metadata, col_names) elif args.anno_type == "count": col_names = _validate_args(args) annotate_variants_count(args, conn, metadata, col_names) elif args.anno_type == "extract": if args.col_extracts is None and not args.anno_file.endswith( '.vcf.gz'): raise RuntimeError("You must specify which column to " "extract from your annotation file.") else: col_names, col_types, col_ops, col_idxs = _validate_extract_args( args) annotate_variants_extract(args, conn, metadata, col_names, col_types, col_ops, col_idxs) else: raise RuntimeError("Unknown column type requested. Exiting.") conn.close() # index on the newly created columns for col_name in col_names: with database_transaction(args.db) as c: c.execute('''drop index if exists %s''' % (col_name + "idx")) c.execute('''create index %s on variants(%s)''' % (col_name + "idx", col_name))
def annotate(parser, args): def _validate_args(args): if (args.col_operations or args.col_types or args.col_extracts): sys.exit('EXITING: You may only specify a column name (-c) when ' 'using \"-a boolean\" or \"-a count\".\n') col_names = args.col_names.split(',') if len(col_names) > 1: sys.exit('EXITING: You may only specify a single column name (-c) ' 'when using \"-a boolean\" or \"-a count\".\n') return col_names def _validate_extract_args(args): col_ops = args.col_operations.split(',') col_names = args.col_names.split(',') col_types = args.col_types.split(',') col_idxs = args.col_extracts.split(',') supported_types = ['text', 'float', 'integer'] for col_type in col_types: if col_type not in supported_types: sys.exit('EXITING: Column type [%s] not supported.\n' % \ (col_type)) supported_ops = ['mean', 'median', 'mode', 'min', 'max', 'first', \ 'last', 'list', 'uniq_list'] for col_op in col_ops: if col_op not in supported_ops: sys.exit('EXITING: Column operation [%s] not supported.\n' % \ (col_op)) if not (len(col_ops) == len(col_names) == \ len(col_types) == len(col_idxs)): sys.exit('EXITING: The number of column names, numbers, types, and ' 'operations must match: [%s], [%s], [%s], [%s]\n' % \ (args.col_names, args.col_extracts, args.col_types, args.col_operations)) return col_names, col_types, col_ops, col_idxs if (args.db is None): parser.print_help() exit(1) if not os.path.exists(args.db): sys.stderr.write("Error: cannot find database file.") exit(1) if not os.path.exists(args.anno_file): sys.stderr.write("Error: cannot find annotation file.") exit(1) conn = sqlite3.connect(args.db) conn.row_factory = sqlite3.Row # allow us to refer to columns by name conn.isolation_level = None if args.anno_type == "boolean": col_names = _validate_args(args) annotate_variants_bool(args, conn, col_names) elif args.anno_type == "count": col_names = _validate_args(args) annotate_variants_count(args, conn, col_names) elif args.anno_type == "extract": if args.col_extracts is None: sys.exit("You must specify which column to " "extract from your annotation file.") else: col_names, col_types, col_ops, col_idxs = _validate_extract_args(args) annotate_variants_extract(args, conn, col_names, col_types, col_ops, col_idxs) else: sys.exit("Unknown column type requested. Exiting.") conn.close() # index on the newly created columns for col_name in col_names: with database_transaction(args.db) as c: c.execute('''create index %s on variants(%s)''' % (col_name + "idx", col_name))