Exemplo n.º 1
0
def outputAllWindows(infile, outfile):
    '''output all windows as a bed file with the l2fold change
    as a score.

    Arguments
    ---------
    infile : string
        Input filename in :term:`tsv` format. Typically the output
        from :mod:`scripts/runExpression`.
    outfile : string
        Output filename in :term:`bed` format.
    '''
    outf = IOTools.open_file(outfile, "w")
    for line in IOTools.iterate(IOTools.open_file(infile)):
        outf.write("\t".join((line.contig, line.start, line.end,
                              "%6.4f" % float(line.l2fold))) + "\n")

    outf.close()
Exemplo n.º 2
0
    def read():

        rx_window = re.compile(options.pattern_window)
        # filter any of the DESeq/EdgeR message that end up at the top of the
        # output file

        for data in IOTools.iterate(options.stdin):

            contig, start, end = rx_window.match(data.test_id).groups()
            start, end = list(map(int, (start, end)))

            yield DATA._make(
                (data.test_id, contig, start, end, data.treatment_name,
                 float(data.treatment_mean),
                 float(data.treatment_std), data.control_name,
                 float(data.control_mean), float(data.control_std),
                 float(data.pvalue), float(data.qvalue), float(data.l2fold),
                 float(data.fold), int(data.significant), data.status, 0))
Exemplo n.º 3
0
def buildDMRStats(infiles, outfile, method, fdr_threshold=None):
    '''build dmr summary statistics.

    This method works from output files created by Expression.py
    (method="deseq" or method="edger") or runMEDIPS (method="medips")

    This method counts the number of up/down, 2fold up/down, etc.
    genes in output from (:mod:`scripts/runExpression`).

    This method also creates diagnostic plots in the
    <exportdir>/<method> directory.

    Arguments
    ---------
    infiles ; list
        List of tabs with DMR output
    outfile : string
        Output filename. Tab separated file summarizing
    method : string
        Method name
    fdr_threshold : float
        FDR threshold to apply. Currently unused.
    '''
    results = collections.defaultdict(lambda: collections.defaultdict(int))
    status = collections.defaultdict(lambda: collections.defaultdict(int))

    # deseq/edger
    def f_significant(x):
        return x.significant == "1"

    def f_up(x):
        return float(x.l2fold) > 0

    def f_down(x):
        return float(x.l2fold) < 0

    def f_fold2up(x):
        return float(x.l2fold) > 1

    def f_fold2down(x):
        return float(x.l2fold) < -1

    def f_key(x):
        return (x.treatment_name, x.control_name)

    def f_status(x):
        return x.status

    outf = IOTools.open_file(outfile, "w")

    is_first = True
    for infile in infiles:

        xx = 0
        for line in IOTools.iterate(IOTools.open_file(infile)):
            key = f_key(line)

            r, s = results[key], status[key]
            r["tested"] += 1
            ss = f_status(line)
            s[ss] += 1

            if ss != "OK":
                continue

            is_significant = f_significant(line)
            up = f_up(line)
            down = f_down(line)
            fold2up = f_fold2up(line)
            fold2down = f_fold2down(line)
            fold2 = fold2up or fold2down

            if up:
                r["up"] += 1
            if down:
                r["down"] += 1
            if fold2up:
                r["l2fold_up"] += 1
            if fold2down:
                r["l2fold_down"] += 1

            if is_significant:
                r["significant"] += 1
                if up:
                    r["significant_up"] += 1
                if down:
                    r["significant_down"] += 1
                if fold2:
                    r["fold2"] += 1
                if fold2up:
                    r["significant_l2fold_up"] += 1
                if fold2down:
                    r["significant_l2fold_down"] += 1

            if xx > 10000:
                break

        if is_first:
            is_first = False
            header1, header2 = set(), set()
            for r in list(results.values()):
                header1.update(list(r.keys()))
            for s in list(status.values()):
                header2.update(list(s.keys()))

            header = ["method", "treatment", "control"]
            header1 = list(sorted(header1))
            header2 = list(sorted(header2))

            outf.write("\t".join(header + header1 + header2) + "\n")

        for treatment, control in list(results.keys()):
            key = (treatment, control)
            r = results[key]
            s = status[key]
            outf.write("%s\t%s\t%s\t" % (method, treatment, control))
            outf.write("\t".join([str(r[x]) for x in header1]) + "\t")
            outf.write("\t".join([str(s[x]) for x in header2]) + "\n")