Пример #1
0
def main():
    p = argparse.ArgumentParser(description=__doc__,
                   formatter_class=argparse.RawDescriptionHelpFormatter)
    p.add_argument("-p", dest="pvals", help="BED containing all the p values"
                  " used to generate `regions`")
    p.add_argument("-r", dest="regions", help="BED containing all the regions")
    p.add_argument("-s", "--step", dest="step", type=int, default=50,
            help="step size for acf calculation. should be the same "
            " value as the step sent to -d arg for acf")
    p.add_argument("-c", dest="c", help="column number containing the p-value"
                   " of interest", type=str, default=-1)
    p.add_argument("-z", dest="z", help="use z-score correction",
                    action="store_true")
    args = p.parse_args()
    if not (args.regions and args.pvals):
        import sys
        sys.exit(not p.print_help())
    header = ts.nopen(args.regions).next()
    if header.startswith("#") or (not header.split("\t")[2].isdigit()):
        print "%s\tslk_p\tslk_sidak_p" % (header.rstrip("\r\n"),)

    header = ts.header(args.pvals)
    if args.c in header:
        args.c = header.index(args.c) + 1
    else:
        args.c = int(args.c)
    return run(args)
def main(tests, xref, head):
    df = pandas.read_table(xref, index_col=[0], compression=get_comp(xref))
    a, b = get_ab(tests)
    out_order = header(tests)
    if head:
        print "\t".join(out_order)
    for l in reader(tests):
        try:
            if df[a][l['SiteA']] or df[b][l['SiteA']] or df[a][l['SiteB']] or df[b][l['SiteB']]:
                print "\t".join(l[h] for h in out_order)
        except KeyError:
            # definitely not a primary site
            pass
Пример #3
0
def main(snpeff, descriptions):
    snpeff_header = header(snpeff)
    additional_columns = header(descriptions)
    full_header = snpeff_header + additional_columns
    descriptions_dict = descriptions_to_dict(descriptions)
    print "\t".join(full_header)
    for l in reader(snpeff):
        #Gene_ID Gene_name
        # l['Gene_ID']
        add = dict()
        try:
            add = descriptions_dict[l['Gene_ID']]
        except KeyError:
            try:
                add = descriptions_dict[l['Gene_name']]
            except KeyError:
                print "\t".join(l[h] for h in snpeff_header)
                continue
        full_line = dict(l.items() + add.items())
        try:
            assert full_line.get('status')
        except AssertionError:
            full_line['status'] = ""
        print "\t".join(full_line[h] for h in full_header)
Пример #4
0
def uniprot(args):
    """Add Uniprot annotation to gene list. Args: genes, uniprotdb, column"""
    uniprot_db = {}
    uniprot_header = header(args.uniprotdb)
    for entry in reader(args.uniprotdb):
        for gene in entry['Gene names'].split():
            uniprot_db[gene] = entry
    for entry in reader(args.genes, header=False):
        uniprot_fields = []
        for gene in entry[int(args.column) - 1].split(","):
            uniprot = uniprot_db.get(gene)
            if uniprot:
                for h in uniprot_header:
                    uniprot_fields.append(uniprot[h])
        print "\t".join(entry) + "\t".join(map(str, uniprot_fields))
Пример #5
0
def get_col_num(c, bed_file=None):
    """
    adjust the col number so it does intutive stuff
    for command-line interface
    >>> get_col_num(4)
    3
    >>> get_col_num(-1)
    -1
    """
    if isinstance(c, basestring) and c.isdigit():
        c = int(c)
    if isinstance(c, (int, long)):
        return c if c < 0 else (c - 1)
    header = ts.header(bed_file)
    assert c in header
    return header.index(c)
Пример #6
0
def main(result_files):
    common_header = header(result_files[0])[:-1]
    samples = set()
    for file in result_files:
        samples.add(op.basename(file).rsplit("_", 2)[0])
    # leaves one entry per unique sample
    for sample in samples:
        # open a new file to facilitate the join
        out = gzip.open("{sample}.combined.txt.gz".format(sample=sample), "wb")
        i = 1
        print >>out, "\t".join(common_header)
        for result_file in result_files:
            if not op.basename(result_file).startswith(sample): continue
            for toks in reader(result_file, header=common_header):
                if not toks['Functionality'] == 'productive': continue
                toks['Sequence number'] = str(i)
                print >>out, "\t".join(toks[h] for h in common_header)
                i += 1
        out.close()
Пример #7
0
def main(args):
    annotation_dict = make_dict(args.annotated_peaks)
    gtf = gtf2dict(args.gtf)
    blast_header = header(args.blast)
    # only save append these columns onto annotation
    annotation_save = "baseMeanA baseMeanB pval padj sequence".split()
    blast_header.extend(annotation_save)
    # stuff one may want from gtf
    gtf_save = "symbol ensg enst".split()
    blast_header.extend(gtf_save)
    print "\t".join(blast_header)
    for b in reader(args.blast, header=True):
        annotation_lookup = annotation_dict[b['qseqid']]
        # ENST00000576218_-
        enst = b['sseqid'].split("_")[0]
        try:
            gtf_lookup = gtf[enst]
        except KeyError:
            gtf_lookup = {'enst':enst, 'ensg':'-', 'symbol':'-'}
        d = dict(b.items() + annotation_lookup.items() + gtf_lookup.items())
        print "\t".join([d[i] for i in blast_header])
Пример #8
0
def main(deseq_output, ensembl_gtf):

    p = "%s.pickle" % ensembl_gtf
    if os.path.exists(p):
        with open(p, 'rb') as handle:
            ensembl_translation = pickle.load(handle)
    else:
        ensembl_translation = make_ens_table(ensembl_gtf)
        with open(p, 'wb') as handle:
            pickle.dump(ensembl_translation, handle)

    h = header(deseq_output, sep=",")
    h[0] = 'ensg'
    h = [i.strip('"') for i in h]
    h.append('type')

    print ",".join(h)

    for toks in reader(deseq_output, sep=","):
        toks['ensg'] = toks['']
        toks['type'] = ensembl_translation[toks['ensembl']]
        print ",".join(toks[i] for i in h)
Пример #9
0
def filter(p_bed, region_bed, max_p=None, region_p=None, p_col_name="P.Value", coef_col_name="logFC"):

    ph = ts.header(p_bed)
    if (ph[1] + ph[2]).isdigit():
        raise Exception("need header in p-value file to run filter")
    assert ph[1] == "start" and ph[2] == "end" and ph[0] == "chrom", ("must have chrom, start, end header for", p_bed)
    ph = ["p" + h for h in ph]

    rh = ts.header(region_bed)
    header = not (rh[1] + rh[2]).isdigit()

    if isinstance(p_col_name, str) and p_col_name.isdigit():
        p_col_name = int(p_col_name) - 1

    if isinstance(p_col_name, (int, long)):
        p_col_name = ph[p_col_name][1:]

    a = dict(p_bed=p_bed, region_bed=region_bed)
    a["p_bed"] = fix_bed(a["p_bed"])
    a["header"] = ""

    j = 0
    for group, plist in groupby(
        ts.reader(
            "|bedtools intersect -b %(p_bed)s \
                         -a %(region_bed)s -wo %(header)s"
            % a,
            header=rh + ph,
        ),
        itemgetter("chrom", "start", "end"),
    ):
        plist = list(plist)

        if region_p:
            r = plist[0]  # first cols are all the same
            region_p_key = "slk_sidak_p" if "slk_sidak_p" in r else "z_sidak_p" if "z_sidak_p" in r else None
            if region_p_key is None:
                raise Exception
            if float(r[region_p_key]) > region_p:
                continue

        try:
            plist = [
                x
                for x in plist
                if (int(x["start"]) <= int(x["pstart"]) <= int(x["pend"]))
                and ((int(x["start"]) <= int(x["pend"]) <= int(x["end"])))
            ]
        except:
            print plist
            raise
        tscores = [float(row["pt"]) for row in plist if "pt" in row]

        if max_p:
            if any(float(row["p" + p_col_name]) > max_p for row in plist):
                continue

        ngt05 = sum(1 for row in plist if float(row["p" + p_col_name]) > 0.05)

        # logic to try to find t and coef headers and skip if not found
        extra_header = []
        extra = []
        if tscores:
            tpos = sum(1 for ts in tscores if ts > 0)
            tneg = sum(1 for ts in tscores if ts < 0)
            tpn = "%i/%i" % (tpos, tneg)

            tsum = str(sum(ts for ts in tscores))
            extra_header += ["t.pos/t.neg", "t.sum"]
            extra += [tpn, tsum]
        else:
            tsum = tpn = "NA"
        if "p" + coef_col_name not in plist[0] and "pcoefficient" in plist[0]:
            coef_col_name = "coefficient"
        if "p" + coef_col_name in plist[0]:
            coef = sum(float(row["p" + coef_col_name]) for row in plist) / len(plist)

            # since we probably had the data logit transformed, here we
            # do the inverse and subtract 0.5 since ilogit(0) == 0.5
            icoef = (sum(ilogit(float(row["p" + coef_col_name])) for row in plist) / len(plist)) - 0.5
            extra_header += ["avg.diff", "ilogit.diff"]
            extra += ["%.3f" % coef, "%.3f" % icoef]
        else:
            coef = icoef = float("nan")

        frow = [plist[0][h] for h in rh] + extra
        if j == 0:
            yield rh + extra_header
            j = 1
        yield frow
Пример #10
0
def pipeline(col_num,
             step,
             dist,
             acf_dist,
             prefix,
             threshold,
             seed,
             bed_files,
             mlog=True,
             region_filter_p=1,
             region_filter_n=None,
             genome_control=False,
             db=None,
             use_fdr=True):
    sys.path.insert(0, op.join(op.dirname(__file__), ".."))
    from cpv import acf, slk, fdr, peaks, region_p, stepsize, filter
    from cpv._common import genome_control_adjust, genomic_control, bediter
    import operator

    if step is None:
        step = min(acf_dist, stepsize.stepsize(bed_files, col_num))
        print >> sys.stderr, "calculated stepsize as: %i" % step

    lags = range(1, acf_dist, step)
    lags.append(lags[-1] + step)

    prefix = prefix.rstrip(".")
    putative_acf_vals = acf.acf(bed_files,
                                lags,
                                col_num,
                                simple=False,
                                mlog=mlog)
    acf_vals = []
    # go out to max requested distance but stop once an autocorrelation
    # < 0.05 is added.
    for a in putative_acf_vals:
        # a is ((lmin, lmax), (corr, N))
        # this heuristic seems to work. stop just above the 0.08 correlation
        # lag.
        if a[1][0] < 0.04 and len(acf_vals) > 2: break
        acf_vals.append(a)
        if a[1][0] < 0.04 and len(acf_vals): break

    # save the arguments that this was called with.
    with open(prefix + ".args.txt", "w") as fh:
        print >> fh, " ".join(sys.argv[1:]) + "\n"
        import datetime
        print >> fh, "date: %s" % datetime.datetime.today()
        from .__init__ import __version__
        print >> fh, "version:", __version__

    with open(prefix + ".acf.txt", "w") as fh:
        acf_vals = acf.write_acf(acf_vals, fh)
        print >> sys.stderr, "wrote: %s" % fh.name
    print >> sys.stderr, "ACF:\n", open(prefix + ".acf.txt").read()

    spvals, opvals = [], []
    with ts.nopen(prefix + ".slk.bed.gz", "w") as fhslk:
        fhslk.write('#chrom\tstart\tend\tp\tregion-p\n')
        for row in slk.adjust_pvals(bed_files, col_num, acf_vals):
            fhslk.write("%s\t%i\t%i\t%.4g\t%.4g\n" % row)
            opvals.append(row[-2])
            spvals.append(row[-1])

    print >> sys.stderr, "# original lambda: %.2f" % genomic_control(opvals)
    del opvals

    gc_lambda = genomic_control(spvals)
    print >> sys.stderr, "wrote: %s with lambda: %.2f" % (fhslk.name,
                                                          gc_lambda)

    if genome_control:
        fhslk = ts.nopen(prefix + ".slk.gc.bed.gz", "w")
        adj = genome_control_adjust(
            [d['p'] for d in bediter(prefix + ".slk.bed.gz", -1)])
        for i, line in enumerate(ts.nopen(prefix + ".slk.bed.gz")):
            print >> fhslk, "%s\t%.5g" % (line.rstrip("\r\n"), adj[i])

        fhslk.close()
        print >> sys.stderr, "wrote: %s" % fhslk.name

    with ts.nopen(prefix + ".fdr.bed.gz", "w") as fh:
        fh.write('#chrom\tstart\tend\tp\tregion-p\tregion-q\n')
        for bh, l in fdr.fdr(fhslk.name, -1):
            fh.write("%s\t%.4g\n" % (l.rstrip("\r\n"), bh))
        print >> sys.stderr, "wrote: %s" % fh.name
    fregions = prefix + ".regions.bed.gz"
    with ts.nopen(fregions, "w") as fh:
        list(
            peaks.peaks(prefix + ".fdr.bed.gz", -1 if use_fdr else -2,
                        threshold, seed, dist, fh, operator.le))
    n_regions = sum(1 for _ in ts.nopen(fregions))
    print >> sys.stderr, "wrote: %s (%i regions)" % (fregions, n_regions)
    if n_regions == 0:
        sys.exit()

    with ts.nopen(prefix + ".regions-p.bed.gz", "w") as fh:
        N = 0
        fh.write("#chrom\tstart\tend\tmin_p\tn_probes\tz_p\tz_sidak_p\n")
        # use -2 for original, uncorrected p-values in slk.bed
        for region_line, slk_p, slk_sidak_p, sim_p in region_p.region_p(
                prefix + ".slk.bed.gz", prefix + ".regions.bed.gz", -2, step):
            fh.write("%s\t%.4g\t%.4g\n" % (region_line, slk_p, slk_sidak_p))
            fh.flush()
            N += int(slk_sidak_p < 0.05)
        print >>sys.stderr, "wrote: %s, (regions with corrected-p < 0.05: %i)" \
                % (fh.name, N)

    regions_bed = fh.name
    header = ts.header(bed_files[0])
    #if all(h in header for h in ('t', 'start', 'end')):
    if region_filter_n is None: region_filter_n = 0
    with ts.nopen(prefix + ".regions-t.bed", "w") as fh:
        N = 0
        for i, toks in enumerate(
                filter.filter(bed_files[0], regions_bed, p_col_name=col_num)):
            if i == 0: toks[0] = "#" + toks[0]
            else:
                if float(toks[6]) > region_filter_p: continue
                if int(toks[4]) < region_filter_n: continue
                #if region_filter_t and "/" in toks[7]:
                #    # t-pos/t-neg. if the lower one is > region_filter_t?
                #    vals = map(int, toks[7].split("/"))
                #    if min(vals) > region_filter_t: continue

                N += 1
            print >> fh, "\t".join(toks)
        print >>sys.stderr, ("wrote: %s, (regions with region-p "
                            "< %.3f and n-probes >= %i: %i)") \
                % (fh.name, region_filter_p, region_filter_n, N)

    try:
        from cpv import manhattan
        regions = manhattan.read_regions(fh.name)

        manhattan.manhattan(prefix + ".slk.bed.gz",
                            3,
                            prefix.rstrip(".") + ".manhattan.png",
                            False, ['#959899', '#484B4C'],
                            "",
                            False,
                            None,
                            regions=regions,
                            bonferonni=False)
    except ImportError:
        pass  # they dont have matplotlib

    if db is not None:
        from cruzdb import Genome
        g = Genome(db)
        lastf = fh.name
        with open(prefix + ".anno.%s.bed" % db, "w") as fh:
            fh.write('#')
            g.annotate(lastf, ("refGene", "cpgIslandExt"),
                       out=fh,
                       feature_strand=True,
                       parallel=len(spvals) > 500)
        print >> sys.stderr, "wrote: %s annotated with %s" % (fh.name, db)
Пример #11
0
import sys
import toolshed as ts
from sklearn import preprocessing
import numpy as np

regions, weighted = [], []
totlen = 0.0

for d in ts.reader(sys.argv[1]):
    totlen += int(d['end']) - int(d['start'])
    regions.append(d)

header = ts.header(sys.argv[1])
print "\t".join(header) + "\t" + "weighted_pct"
pct = 100.0
regionlength = 0

opct = regions[0]['resid_pctile']
for d in regions:
    regionlength += int(d['end']) - int(d['start'])
    if d['resid_pctile'] != opct:
        pct -= regionlength / totlen * 100
        regionlength = 0
        opct = d['resid_pctile']
    weighted.append(pct)

X_train = np.array(weighted).reshape(len(weighted), 1)
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 100))
resid_pctile = min_max_scaler.fit_transform(X_train)
for i, d in enumerate(regions):
    print "\t".join(d[h] for h in header) + "\t" + "%.9f" % resid_pctile[i]
Пример #12
0
def filter(p_bed,
           region_bed,
           max_p=None,
           region_p=None,
           p_col_name="P.Value",
           coef_col_name="logFC"):

    ph = ts.header(p_bed)
    if (ph[1] + ph[2]).isdigit():
        raise Exception('need header in p-value file to run filter')
    assert ph[1] == 'start' and ph[2] == 'end' and ph[0] == 'chrom', \
            ('must have chrom, start, end header for', p_bed)
    ph = ['p' + h for h in ph]

    rh = ts.header(region_bed)
    header = not (rh[1] + rh[2]).isdigit()

    if isinstance(p_col_name, str) and p_col_name.isdigit():
        p_col_name = int(p_col_name) - 1

    if isinstance(p_col_name, (int, long)):
        p_col_name = ph[p_col_name][1:]

    a = dict(p_bed=p_bed, region_bed=region_bed)
    a['p_bed'] = fix_bed(a['p_bed'])
    a['header'] = ""

    j = 0
    for group, plist in groupby(
            ts.reader('|bedtools intersect -b %(p_bed)s \
                         -a %(region_bed)s -wo %(header)s' % a,
                      header=rh + ph), itemgetter('chrom', 'start', 'end')):
        plist = list(plist)

        if region_p:
            r = plist[0]  # first cols are all the same
            region_p_key = 'slk_sidak_p' if 'slk_sidak_p' in r \
                                         else 'z_sidak_p' if 'z_sidak_p' in r \
                                         else None
            if region_p_key is None: raise Exception
            if float(r[region_p_key]) > region_p:
                continue

        try:
            plist = [
                x for x in plist
                if (int(x['start']) <= int(x['pstart']) <= int(x['pend'])) and
                ((int(x['start']) <= int(x['pend']) <= int(x['end'])))
            ]
        except:
            print(plist)
            raise
        tscores = [float(row['pt']) for row in plist if 'pt' in row]

        if max_p:
            if any(float(row['p' + p_col_name]) > max_p for row in plist):
                continue

        ngt05 = sum(1 for row in plist if float(row['p' + p_col_name]) > 0.05)

        # logic to try to find t and coef headers and skip if not found
        extra_header = []
        extra = []
        if tscores:
            tpos = sum(1 for ts in tscores if ts > 0)
            tneg = sum(1 for ts in tscores if ts < 0)
            tpn = "%i/%i" % (tpos, tneg)

            tsum = str(sum(ts for ts in tscores))
            extra_header += ["t.pos/t.neg", "t.sum"]
            extra += [tpn, tsum]
        else:
            tsum = tpn = "NA"
        if 'p' + coef_col_name not in plist[0] and 'pcoefficient' in plist[0]:
            coef_col_name = 'coefficient'
        if 'p' + coef_col_name in plist[0]:
            coef = (sum(float(row['p' + coef_col_name])
                        for row in plist) / len(plist))

            # since we probably had the data logit transformed, here we
            # do the inverse and subtract 0.5 since ilogit(0) == 0.5
            icoef = (
                sum(ilogit(float(row['p' + coef_col_name]))
                    for row in plist) / len(plist)) - 0.5
            extra_header += ["avg.diff", "ilogit.diff"]
            extra += ["%.3f" % coef, "%.3f" % icoef]
        else:
            coef = icoef = float('nan')

        frow = [plist[0][h] for h in rh] + extra
        if j == 0:
            yield rh + extra_header
            j = 1
        yield frow
Пример #13
0
def pipeline(col_num, step, dist, acf_dist, prefix, threshold, seed,
        bed_files, mlog=True, region_filter_p=1, region_filter_n=None,
        genome_control=False, db=None, use_fdr=True):
    sys.path.insert(0, op.join(op.dirname(__file__), ".."))
    from cpv import acf, slk, fdr, peaks, region_p, stepsize, filter
    from cpv._common import genome_control_adjust, genomic_control, bediter
    import operator


    if step is None:
        step = min(acf_dist, stepsize.stepsize(bed_files, col_num))
        print >>sys.stderr, "calculated stepsize as: %i" % step

    lags = range(1, acf_dist, step)
    lags.append(lags[-1] + step)

    prefix = prefix.rstrip(".")
    putative_acf_vals = acf.acf(bed_files, lags, col_num, simple=False,
                                mlog=mlog)
    acf_vals = []
    # go out to max requested distance but stop once an autocorrelation
    # < 0.05 is added.
    for a in putative_acf_vals:
        # a is ((lmin, lmax), (corr, N))
        # this heuristic seems to work. stop just above the 0.08 correlation
        # lag.
        if a[1][0] < 0.04 and len(acf_vals) > 2: break
        acf_vals.append(a)
        if a[1][0] < 0.04 and len(acf_vals): break

    # save the arguments that this was called with.
    with open(prefix + ".args.txt", "w") as fh:
        print >>fh, " ".join(sys.argv[1:]) + "\n"
        import datetime
        print >>fh, "date: %s" % datetime.datetime.today()
        from .__init__ import __version__
        print >>fh, "version:", __version__

    with open(prefix + ".acf.txt", "w") as fh:
        acf_vals = acf.write_acf(acf_vals, fh)
        print >>sys.stderr, "wrote: %s" % fh.name
    print >>sys.stderr, "ACF:\n", open(prefix + ".acf.txt").read()

    spvals, opvals = [], []
    with ts.nopen(prefix + ".slk.bed.gz", "w") as fhslk:
        fhslk.write('#chrom\tstart\tend\tp\tregion-p\n')
        for row in slk.adjust_pvals(bed_files, col_num, acf_vals):
            fhslk.write("%s\t%i\t%i\t%.4g\t%.4g\n" % row)
            opvals.append(row[-2])
            spvals.append(row[-1])

    print >>sys.stderr, "# original lambda: %.2f" % genomic_control(opvals)
    del opvals

    gc_lambda = genomic_control(spvals)
    print >>sys.stderr, "wrote: %s with lambda: %.2f" % (fhslk.name, gc_lambda)

    if genome_control:
        fhslk = ts.nopen(prefix + ".slk.gc.bed.gz", "w")
        adj = genome_control_adjust([d['p'] for d in bediter(prefix + ".slk.bed.gz", -1)])
        for i, line in enumerate(ts.nopen(prefix + ".slk.bed.gz")):
            print >>fhslk, "%s\t%.5g" % (line.rstrip("\r\n"), adj[i])

        fhslk.close()
        print >>sys.stderr, "wrote: %s" % fhslk.name

    with ts.nopen(prefix + ".fdr.bed.gz", "w") as fh:
        fh.write('#chrom\tstart\tend\tp\tregion-p\tregion-q\n')
        for bh, l in fdr.fdr(fhslk.name, -1):
            fh.write("%s\t%.4g\n" % (l.rstrip("\r\n"), bh))
        print >>sys.stderr, "wrote: %s" % fh.name
    fregions = prefix + ".regions.bed.gz"
    with ts.nopen(fregions, "w") as fh:
        list(peaks.peaks(prefix + ".fdr.bed.gz", -1 if use_fdr else -2, threshold, seed,
            dist, fh, operator.le))
    n_regions = sum(1 for _ in ts.nopen(fregions))
    print >>sys.stderr, "wrote: %s (%i regions)" % (fregions, n_regions)
    if n_regions == 0:
        sys.exit()

    with ts.nopen(prefix + ".regions-p.bed.gz", "w") as fh:
        N = 0
        fh.write("#chrom\tstart\tend\tmin_p\tn_probes\tz_p\tz_sidak_p\n")
        # use -2 for original, uncorrected p-values in slk.bed
        for region_line, slk_p, slk_sidak_p, sim_p in region_p.region_p(
                               prefix + ".slk.bed.gz",
                               prefix + ".regions.bed.gz", -2,
                               step):
            fh.write("%s\t%.4g\t%.4g\n" % (region_line, slk_p, slk_sidak_p))
            fh.flush()
            N += int(slk_sidak_p < 0.05)
        print >>sys.stderr, "wrote: %s, (regions with corrected-p < 0.05: %i)" \
                % (fh.name, N)

    regions_bed = fh.name
    header = ts.header(bed_files[0])
    #if all(h in header for h in ('t', 'start', 'end')):
    if region_filter_n is None: region_filter_n = 0
    with ts.nopen(prefix + ".regions-t.bed", "w") as fh:
        N = 0
        for i, toks in enumerate(filter.filter(bed_files[0],
            regions_bed, p_col_name=col_num)):
            if i == 0: toks[0] = "#" + toks[0]
            else:
                if float(toks[6]) > region_filter_p: continue
                if int(toks[4]) < region_filter_n: continue
                #if region_filter_t and "/" in toks[7]:
                #    # t-pos/t-neg. if the lower one is > region_filter_t?
                #    vals = map(int, toks[7].split("/"))
                #    if min(vals) > region_filter_t: continue

                N += 1
            print >>fh, "\t".join(toks)
        print >>sys.stderr, ("wrote: %s, (regions with region-p "
                            "< %.3f and n-probes >= %i: %i)") \
                % (fh.name, region_filter_p, region_filter_n, N)

    try:
        from cpv import manhattan
        regions = manhattan.read_regions(fh.name)

        manhattan.manhattan(prefix + ".slk.bed.gz", 3, prefix.rstrip(".") + ".manhattan.png",
                         False, ['#959899', '#484B4C'], "", False, None,
                         regions=regions, bonferonni=False)
    except ImportError:
        pass # they dont have matplotlib


    if db is not None:
        from cruzdb import Genome
        g = Genome(db)
        lastf = fh.name
        with open(prefix + ".anno.%s.bed" % db, "w") as fh:
            fh.write('#')
            g.annotate(lastf, ("refGene", "cpgIslandExt"), out=fh,
                    feature_strand=True, parallel=len(spvals) > 500)
        print >>sys.stderr, "wrote: %s annotated with %s" % (fh.name, db)