Пример #1
0
def read_pvalues(bedfilename, log_pvalues, verbose):
    ''' read in p-values from a bed file score field.
    
    returns: list sorted by signifance (most significant first)'''
    pvals = []

    if verbose:
        print >>sys.stderr, ">> reading p-values from %s .." % bedfilename

    with maybe_gzip_open(bedfilename) as bedfile:
        for datum in read_bed(bedfile):
            if log_pvalues:
                pval = datum.score
            else:
                pval = -1 * log10(pvalue)
            pvals.append(pval)

    if verbose:
        print >>sys.stderr, ">> read %d p-values" % len(pvals)

    # sort the pvalues from most to least signif (smallest to largest) and
    # reverse so largest are first
    pvals.sort()

    # if pvals are log transformed, biggest (i.e. most significant) are
    # first
    if log_pvalues: pvals.reverse()

    return pvals
def get_region_counts(bedfilenames, verbose):
    # counts the number of times a base is covered by a peak call
    region_counts = defaultdict(Counter)

    for bedfilename in bedfilenames:
        if verbose:
            print >>sys.stderr, ">> loading regions from %s" % \
                bedfilename

        with maybe_gzip_open(bedfilename) as bedfile:
            for datum in read_bed(bedfile):
                for pos in range(datum.chromStart, datum.chromEnd):
                    region_counts[datum.chrom][pos] += 1

    return region_counts
Пример #3
0
def calc_qvalues(real_bedfilename, null_bedfilename, log_pvalues, verbose):

    # read in real p-values.
    real_pvals = read_pvalues(real_bedfilename, log_pvalues, verbose)

    # read in null p-values
    null_pvals = read_pvalues(null_bedfilename, log_pvalues, verbose)

    num_real = float(len(real_pvals))
    num_null = float(len(null_pvals))

    # make sure both are defined
    assert num_real and num_null 

    # normalization factor to account for different numbers of real and
    # null p-values
    frac_real = num_real / num_null

    if verbose:
        print >>sys.stderr, ">> normalization factor: %.5f" % frac_real

    # compute pvalue thresholds
    pval_thresh = compute_pval_thresh(real_pvals, null_pvals, verbose)

    # go back over real pvalues and assign qvalues
    with maybe_gzip_open(real_bedfilename) as bedfile:
        for datum in read_bed(bedfile):

            if log_pvalues:
                pval = float(datum.score)
            else:
                pval = -1 * log10(pvalue)

            qval = pval_thresh[pval]

            norm_qval = qval * frac_real

            # print in table format
            fields = (datum.chrom, datum.chromStart, datum.chromEnd,
                      datum.name, pval, datum.strand, norm_qval)
            print '\t'.join(map(str, fields))