Пример #1
0
def amend_sample(args):
    loaded_subjects = get_subjects(args)
    ped_dict = load_ped_file(args.sample)
    header = get_ped_fields(args.sample)
    with database_transaction(args.db) as c:
        for k, v in loaded_subjects.items():
            if k in ped_dict:
                item_list = map(quote_string, ped_dict[k])
                sample = zip(header, item_list)
                set_str = ",".join([str(x) + "=" + str(y) for (x, y) in sample])
                sql_query = "update samples set {0} where sample_id={1}"
                c.execute(sql_query.format(set_str, v.sample_id))
Пример #2
0
def amend_sample(args):
    loaded_subjects = get_subjects(args)
    ped_dict = load_ped_file(args.sample)
    header = get_ped_fields(args.sample)
    with database_transaction(args.db) as c:
        for k, v in loaded_subjects.items():
            if k in ped_dict:
                item_list = map(quote_string, ped_dict[k])
                sample = zip(header, item_list)
                set_str = ",".join(
                    [str(x) + "=" + str(y) for (x, y) in sample])
                sql_query = "update samples set {0} where sample_id={1}"
                c.execute(sql_query.format(set_str, v.sample_id))
Пример #3
0
def annotate(parser, args):
    check_dependencies("annotate", [["tabix", "-h"],
                                    ["bgzip", "-h"]])
    def _validate_args(args):
        if (args.col_operations or args.col_types or args.col_extracts):
            sys.exit('EXITING: You may only specify a column name (-c) when '
                     'using \"-a boolean\" or \"-a count\".\n')

        col_names = args.col_names.split(',')
        if len(col_names) > 1:
            sys.exit('EXITING: You may only specify a single column name (-c) '
                     'when using \"-a boolean\" or \"-a count\".\n')

        if not args.anno_file.endswith(('.vcf', '.vcf.gz')) and args.region_only and parser is not None:
            sys.exit('EXITING: You may only specify --region-only when annotation is a VCF.')

        return col_names

    def _validate_extract_args(args):
        if args.anno_file.endswith(('.vcf', '.vcf.gz')):
            if not args.col_names:
                args.col_names = args.col_extracts
            elif not args.col_extracts:
                args.col_extracts = args.col_names
        elif args.region_only and parser is not None:
            sys.exit('EXITING: You may only specify --region-only when annotation is a VCF.1')

        if not args.col_types:
            sys.exit('EXITING: need to give column types ("-t")\n')
        col_ops = args.col_operations.split(',')
        col_idxs = args.col_extracts.split(',')

        col_names = args.col_names.split(',')
        col_types = args.col_types.split(',')

        supported_types = ['text', 'float', 'integer']
        for col_type in col_types:
            if col_type not in supported_types:
                sys.exit('EXITING: Column type [%s] not supported.\n' %
                         (col_type))

        supported_ops = op_funcs.keys()

        for col_op in col_ops:
            if col_op not in supported_ops:
                sys.exit('EXITING: Column operation [%s] not supported.\n' %
                         (col_op))

        if not (len(col_ops) == len(col_names) ==
                len(col_types) == len(col_idxs)):
            sys.exit('EXITING: The number of column names, numbers, types, and '
                     'operations must match: [%s], [%s], [%s], [%s]\n' %
                     (args.col_names, args.col_extracts, args.col_types, args.col_operations))

        return col_names, col_types, col_ops, col_idxs

    if (args.db is None):
        parser.print_help()
        exit(1)
    if not os.path.exists(args.db):
        sys.stderr.write("Error: cannot find database file.")
        exit(1)
    if not os.path.exists(args.anno_file):
        sys.stderr.write("Error: cannot find annotation file.")
        exit(1)

    conn = sqlite3.connect(args.db)
    conn.row_factory = sqlite3.Row  # allow us to refer to columns by name
    conn.isolation_level = None

    if args.anno_type == "boolean":
        col_names = _validate_args(args)
        annotate_variants_bool(args, conn, col_names)
    elif args.anno_type == "count":
        col_names = _validate_args(args)
        annotate_variants_count(args, conn, col_names)
    elif args.anno_type == "extract":
        if args.col_extracts is None and not args.anno_file.endswith('.vcf.gz'):
            sys.exit("You must specify which column to "
                     "extract from your annotation file.")
        else:
            col_names, col_types, col_ops, col_idxs = _validate_extract_args(args)
            annotate_variants_extract(args, conn, col_names, col_types, col_ops, col_idxs)
    else:
        sys.exit("Unknown column type requested. Exiting.")

    conn.close()

    # index on the newly created columns
    for col_name in col_names:
        with database_transaction(args.db) as c:
            c.execute('''drop index if exists %s''' % (col_name + "idx"))
            c.execute('''create index %s on variants(%s)''' % (col_name + "idx", col_name))
Пример #4
0
def annotate(parser, args):
    def _validate_args(args):
        if (args.col_operations or args.col_types or args.col_extracts):
            sys.exit('EXITING: You may only specify a column name (-c) when '
                     'using \"-a boolean\" or \"-a count\".\n')

        col_names = args.col_names.split(',')
        if len(col_names) > 1:
            sys.exit('EXITING: You may only specify a single column name (-c) '
                     'when using \"-a boolean\" or \"-a count\".\n')
        return col_names

    def _validate_extract_args(args):
        col_ops = args.col_operations.split(',')
        col_names = args.col_names.split(',')
        col_types = args.col_types.split(',')
        col_idxs = args.col_extracts.split(',')

        supported_types = ['text', 'float', 'integer']
        for col_type in col_types:
            if col_type not in supported_types:
                sys.exit('EXITING: Column type [%s] not supported.\n' %
                         (col_type))

        supported_ops = [
            'mean', 'median', 'mode', 'min', 'max', 'first', 'last', 'list',
            'uniq_list'
        ]
        for col_op in col_ops:
            if col_op not in supported_ops:
                sys.exit('EXITING: Column operation [%s] not supported.\n' %
                         (col_op))

        if not (len(col_ops) == len(col_names) == len(col_types) ==
                len(col_idxs)):
            sys.exit(
                'EXITING: The number of column names, numbers, types, and '
                'operations must match: [%s], [%s], [%s], [%s]\n' %
                (args.col_names, args.col_extracts, args.col_types,
                 args.col_operations))

        return col_names, col_types, col_ops, col_idxs

    if (args.db is None):
        parser.print_help()
        exit(1)
    if not os.path.exists(args.db):
        sys.stderr.write("Error: cannot find database file.")
        exit(1)
    if not os.path.exists(args.anno_file):
        sys.stderr.write("Error: cannot find annotation file.")
        exit(1)

    conn = sqlite3.connect(args.db)
    conn.row_factory = sqlite3.Row  # allow us to refer to columns by name
    conn.isolation_level = None

    if args.anno_type == "boolean":
        col_names = _validate_args(args)
        annotate_variants_bool(args, conn, col_names)
    elif args.anno_type == "count":
        col_names = _validate_args(args)
        annotate_variants_count(args, conn, col_names)
    elif args.anno_type == "extract":
        if args.col_extracts is None:
            sys.exit("You must specify which column to "
                     "extract from your annotation file.")
        else:
            col_names, col_types, col_ops, col_idxs = _validate_extract_args(
                args)
            annotate_variants_extract(args, conn, col_names, col_types,
                                      col_ops, col_idxs)
    else:
        sys.exit("Unknown column type requested. Exiting.")

    conn.close()

    # index on the newly created columns
    for col_name in col_names:
        with database_transaction(args.db) as c:
            c.execute('''create index %s on variants(%s)''' %
                      (col_name + "idx", col_name))
Пример #5
0
def annotate(parser, args):
    check_dependencies("annotate", [["tabix", "-h"], ["bgzip", "-h"]])

    def _validate_args(args):
        if (args.col_operations or args.col_types or args.col_extracts):
            raise ValueError(
                'You must not specify a column type (-t), op (-o) or extract (-e) when '
                'using \"-a boolean\" or \"-a count\".\n')

        col_names = args.col_names.split(',')
        if len(col_names) > 1:
            raise ValueError('You may only specify a single column name (-c) '
                             'when using \"-a boolean\" or \"-a count\".\n')

        if not args.anno_file.endswith(
            ('.vcf', '.vcf.gz')) and args.region_only and parser is not None:
            raise ValueError(
                'You may only specify --region-only when annotation is a VCF.')

        return col_names

    def _validate_extract_args(args):
        if args.anno_file.endswith(('.vcf', '.vcf.gz')):
            if not args.col_names:
                args.col_names = args.col_extracts
            elif not args.col_extracts:
                args.col_extracts = args.col_names
        elif args.region_only and parser is not None:
            raise ValueError(
                'You may only specify --region-only when annotation is a VCF.1'
            )

        if not args.col_types:
            raise ValueError('need to give column types ("-t")\n')
        col_ops = args.col_operations.split(',')
        col_idxs = args.col_extracts.split(',')

        col_names = args.col_names.split(',')
        col_types = args.col_types.split(',')

        supported_types = ['text', 'float', 'integer']
        for col_type in col_types:
            if col_type not in supported_types:
                raise ValueError('Column type [%s] not supported.\n' %
                                 (col_type))

        supported_ops = op_funcs.keys()

        for col_op in col_ops:
            if col_op not in supported_ops:
                raise ValueError('Column operation [%s] not supported.\n' %
                                 (col_op))

        if not (len(col_ops) == len(col_names) == len(col_types) ==
                len(col_idxs)):
            raise ValueError(
                'The number of column names, numbers, types, and '
                'operations must match: [%s], [%s], [%s], [%s]\n' %
                (args.col_names, args.col_extracts, args.col_types,
                 args.col_operations))

        return col_names, col_types, col_ops, col_idxs

    if (args.db is None):
        parser.print_help()
        exit(1)
    if not os.path.exists(args.anno_file):
        sys.stderr.write("Error: cannot find annotation file.")
        exit(1)

    conn, metadata = database.get_session_metadata(args.db)

    if args.anno_type == "boolean":
        col_names = _validate_args(args)
        annotate_variants_bool(args, conn, metadata, col_names)
    elif args.anno_type == "count":
        col_names = _validate_args(args)
        annotate_variants_count(args, conn, metadata, col_names)
    elif args.anno_type == "extract":
        if args.col_extracts is None and not args.anno_file.endswith(
                '.vcf.gz'):
            raise RuntimeError("You must specify which column to "
                               "extract from your annotation file.")
        else:
            col_names, col_types, col_ops, col_idxs = _validate_extract_args(
                args)
            annotate_variants_extract(args, conn, metadata, col_names,
                                      col_types, col_ops, col_idxs)
    else:
        raise RuntimeError("Unknown column type requested. Exiting.")

    conn.close()

    # index on the newly created columns
    for col_name in col_names:
        with database_transaction(args.db) as c:
            c.execute('''drop index if exists %s''' % (col_name + "idx"))
            c.execute('''create index %s on variants(%s)''' %
                      (col_name + "idx", col_name))
Пример #6
0
def annotate(parser, args):

    def _validate_args(args):
        if (args.col_operations or args.col_types or args.col_extracts):
            sys.exit('EXITING: You may only specify a column name (-c) when '
                     'using \"-a boolean\" or \"-a count\".\n')

        col_names = args.col_names.split(',')
        if len(col_names) > 1:
            sys.exit('EXITING: You may only specify a single column name (-c) '
                     'when using \"-a boolean\" or \"-a count\".\n')
        return col_names

    def _validate_extract_args(args):
        col_ops = args.col_operations.split(',')
        col_names = args.col_names.split(',')
        col_types = args.col_types.split(',')
        col_idxs  = args.col_extracts.split(',')

        supported_types = ['text', 'float', 'integer']
        for col_type in col_types:
            if col_type not in supported_types:
                sys.exit('EXITING: Column type [%s] not supported.\n' % \
                         (col_type))

        supported_ops = ['mean', 'median', 'mode', 'min', 'max', 'first', \
                         'last', 'list', 'uniq_list']
        for col_op in col_ops:
            if col_op not in supported_ops:
                sys.exit('EXITING: Column operation [%s] not supported.\n' % \
                         (col_op))

        if not (len(col_ops) == len(col_names) == \
                len(col_types) == len(col_idxs)):
            sys.exit('EXITING: The number of column names, numbers, types, and '
                     'operations must match: [%s], [%s], [%s], [%s]\n' % \
                     (args.col_names, args.col_extracts, args.col_types, args.col_operations))

        return col_names, col_types, col_ops, col_idxs



    if (args.db is None):
        parser.print_help()
        exit(1)
    if not os.path.exists(args.db):
        sys.stderr.write("Error: cannot find database file.")
        exit(1)
    if not os.path.exists(args.anno_file):
        sys.stderr.write("Error: cannot find annotation file.")
        exit(1)

    conn = sqlite3.connect(args.db)
    conn.row_factory = sqlite3.Row  # allow us to refer to columns by name
    conn.isolation_level = None

    if args.anno_type == "boolean":
        col_names = _validate_args(args)
        annotate_variants_bool(args, conn, col_names)
    elif args.anno_type == "count":
        col_names = _validate_args(args)
        annotate_variants_count(args, conn, col_names)
    elif args.anno_type == "extract":
        if args.col_extracts is None:
            sys.exit("You must specify which column to "
                     "extract from your annotation file.")
        else:
            col_names, col_types, col_ops, col_idxs = _validate_extract_args(args)
            annotate_variants_extract(args, conn, col_names, col_types, col_ops, col_idxs)
    else:
        sys.exit("Unknown column type requested. Exiting.")

    conn.close()

    # index on the newly created columns
    for col_name in col_names:
        with database_transaction(args.db) as c:
            c.execute('''create index %s on variants(%s)''' % (col_name + "idx", col_name))