Exemplo n.º 1
0
def tfam(args):
    """
    Report the information about the samples in the DB in TFAM format:
    http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml
    """

    query = ("select family_id, name, paternal_id, maternal_id, "
             "sex, phenotype from samples")
    gq = GeminiQuery(args.db)
    gq.run(query)
    for row in gq:
        print " ".join(map(str, [row['family_id'], row['name'], row['paternal_id'],
                        row['maternal_id'], row['sex'], row['phenotype']]))
Exemplo n.º 2
0
def tfam(args):
    """
    Report the information about the samples in the DB in TFAM format:
    http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml
    """

    query = ("select family_id, name, paternal_id, maternal_id, "
             "sex, phenotype from samples")
    gq = GeminiQuery(args.db)
    gq.run(query)
    for row in gq:
        print " ".join(map(str, [row['family_id'], row['name'], row['paternal_id'],
                        row['maternal_id'], row['sex'], row['phenotype']]))
Exemplo n.º 3
0
def genewise(db, gt_filters, filter=None, columns=None, min_filters=None,
             min_variants=1,
             grouper="gene"):
    assert os.path.exists(db)

    orig_columns = [x.strip() for x in (columns or "").split(",")]
    added_cols = add_cols(orig_columns, "||".join(gt_filters))
    if grouper not in orig_columns:
        added_cols.append(grouper)
    columns = orig_columns + added_cols
    assert not any(';' in c for c in columns)

    # NOTE: we could make the WHERE part customizable.
    query = "SELECT {columns} FROM variants WHERE (is_exonic = 1 AND impact_severity != 'LOW')"
    if filter:
        query += " AND  " + filter
    query += " ORDER BY CHROM, gene"

    gq = GeminiQuery(db, include_gt_cols=True)

    # use the GeminiQuery machinery to transform to something that's eval'able
    # then compile once for speed.
    cleaned_filters = []
    for gt_filter in gt_filters:
        assert gq._is_gt_filter_safe(gt_filter)
        gt_filter = gq._correct_genotype_filter(gt_filter)
        cleaned_filters.append(compile(gt_filter, gt_filter, 'eval'))

    gq.run(query.format(columns=", ".join(columns)))

    if isinstance(grouper, basestring):
        grouper = operator.itemgetter(grouper)

    user_dict = dict(sample_info=gq.sample_info)
    header_printed = False
    for groupkey, grp in it.groupby(gq, grouper):
        grp = list(grp)
        for x in gen_results(list(grp), cleaned_filters, min_filters or 0,
                             min_variants, columns, user_dict=user_dict):
            for c in added_cols:
                if c != 'gene':
                    del x.print_fields[c]
            if not header_printed:
                print "\t".join(x.print_fields.keys())
                header_printed = True
            print x
Exemplo n.º 4
0
def genewise(db,
             gt_filters,
             filter=None,
             columns=None,
             min_filters=None,
             min_variants=1,
             grouper="gene"):
    assert os.path.exists(db)

    orig_columns = [x.strip() for x in (columns or "").split(",")]
    added_cols = add_cols(orig_columns, "||".join(gt_filters))
    if grouper not in orig_columns:
        added_cols.append(grouper)
    columns = orig_columns + added_cols
    assert not any(';' in c for c in columns)

    # NOTE: we could make the WHERE part customizable.
    query = "SELECT {columns} FROM variants WHERE (is_exonic = 1 AND impact_severity != 'LOW')"
    if filter:
        query += " AND  " + filter
    query += " ORDER BY CHROM, gene"

    gq = GeminiQuery(db, include_gt_cols=True)

    # use the GeminiQuery machinery to transform to something that's eval'able
    # then compile once for speed.
    cleaned_filters = []
    for gt_filter in gt_filters:
        assert gq._is_gt_filter_safe(gt_filter)
        gt_filter = gq._correct_genotype_filter(gt_filter)
        cleaned_filters.append(compile(gt_filter, gt_filter, 'eval'))

    gq.run(query.format(columns=", ".join(columns)))

    if isinstance(grouper, basestring):
        grouper = operator.itemgetter(grouper)

    user_dict = dict(sample_info=gq.sample_info)
    header_printed = False
    for groupkey, grp in it.groupby(gq, grouper):
        grp = list(grp)
        for x in gen_results(list(grp),
                             cleaned_filters,
                             min_filters or 0,
                             min_variants,
                             columns,
                             user_dict=user_dict):
            for c in added_cols:
                if c != 'gene':
                    del x.print_fields[c]
            if not header_printed:
                print "\t".join(x.print_fields.keys())
                header_printed = True
            print x