Пример #1
0
def compute_pval_thresh(real_pvals, null_pvals, verbose):
    ''' given an observed p-value and a collection of real and random p-values,
    calculate the empirical false discovery rate (FDR) for the p-value.

    returns: dictionary of floats'''

    pval_thresh = dict()

    if verbose:
        label = ">> computing thresholds: "
        progress = ProgressBar(len(real_pvals), label=label)

    for obs_pval in real_pvals:

        if verbose: progress.next()

        if obs_pval in pval_thresh: continue

        real_pvals_better = num_pvals_better(obs_pval, real_pvals, verbose)
        null_pvals_better = num_pvals_better(obs_pval, null_pvals, verbose)

        if real_pvals_better == 0:
            qval = 0.0
        else:
            qval = null_pvals_better / real_pvals_better

        pval_thresh[obs_pval] = qval

    if verbose: progress.end()

    return pval_thresh
Пример #2
0
def find_trimmed_peaks(peak_data, genome, chrom, trackname, max_width, verbose):
    ''' trim peaks to min width with max signal.

    returns: data with same form as peak_data, trimmed'''

    # TODO: recalculate pvalue for the trimmed and return that

    # XXX: there is likely a faster way to do this with native numpy
    # functions. speed scales exponentially with window size, may want to
    # add a "max_width" param to prevent trimming of peaks e.g.  >1000 bp

    trimmed_peaks = []

    if verbose:
        progress = ProgressBar(len(peak_data),
                               label=">> trimming peaks: ")

    for peak_start, peak_end, pval in peak_data:

        if peak_end - peak_start > max_width:
            trimmed_peaks.append((peak_start, peak_end, pval))
            continue

        if verbose: progress.next()

        continuous = genome[chrom.name][peak_start:peak_end, trackname]
        trim_peak_start, trim_peak_end = max_contiguous_signal(continuous)

        # update coords with new start & end - set end first
        peak_end = peak_start + trim_peak_end
        peak_start = peak_start + trim_peak_start

        trimmed_peaks.append((peak_start, peak_end, pval))

    # clean up progress bar
    if verbose: progress.end()

    return trimmed_peaks
Пример #3
0
def identify_local_peaks(chromosome, track_index, peak_width, lambda_base,
                         log_pval_thresh, local_lambda, shuffle_data, verbose):
    ''' identify peaks within a chromsome.
    
    returns: list of (start, end, pvalue) tuples'''

    local_peaks = []

    # pdb.set_trace()

    if verbose:
        progress = ProgressBar(num_supercontigs(chromosome),
                               label=">> calling peaks: ")

    for supercontig, continuous in chromosome.itercontinuous():

        if verbose: progress.next()

        # load data into memory
        track_continuous = continuous[:, track_index]

        # shuffle the data if requested
        if shuffle_data: shuffle(track_continuous)

        # generate window ranges i.e. peak widths
        peak_range = xrange(0, len(track_continuous), int(peak_width))

        for peak_start in peak_range:

            # set the peak end
            peak_end = int(peak_start + peak_width)

            peak_data = track_continuous[peak_start:peak_end]

            # skip the window if it is empty
            if isnan(nansum(peak_data)): continue

            # calculate local lambdas and choose max
            if local_lambda:
                lambda_values = calc_local_lambdas(peak_width,
                                                   track_continuous,
                                                   peak_start, peak_end)
                lambda_values.append(lambda_base)
                lambda_value = max(lambda_values)
            else:
                lambda_value = lambda_base

            # initial pvalue for the region
            log_pvalue = calc_log_pvalue(peak_data, lambda_value,
                                         verbose)

            # check that the pvalue is defined & that it meets the
            # threshold
            if not log_pvalue or log_pvalue < log_pval_thresh: continue

            # calculate chromosomal coords relative to current
            # contig coords
            chrom_start = supercontig.start + peak_start
            chrom_end = supercontig.start + peak_end

            # save current peak
            fields = (chrom_start, chrom_end, log_pvalue)
            local_peaks.append(fields)

            # reset lambda value to base
            lambda_value = lambda_base

    if verbose: progress.end() 

    return local_peaks