Пример #1
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.ArgumentParser(description=__doc__)

    parser.add_argument("--version", action='version', version="1.0")

    parser.add_argument("-o", "--output-section", dest="output", type=str,
                        choices=("full", "name"),
                        help="output either ``full`` overlapping entries, only the ``name``s.")

    parser.set_defaults(
        output="full",
    )

    # add common options (-h/--help, ...) and parse command line
    (args, unknown) = E.start(parser,
                              argv=argv,
                              unknowns=True)

    if len(unknown) != 2:
        raise ValueError("two arguments required")

    if unknown[0] == "-":
        infile1 = args.stdin
    else:
        infile1 = iotools.open_file(unknown[0], "r")

    infile2 = iotools.open_file(unknown[1], "r")

    idx = Bed.readAndIndex(infile2, with_values=True)

    output = args.output
    outfile = args.stdout

    if output == "name":
        outfile.write("name1\tname2\n")
        outf = lambda x: x.fields[0]
    else:
        outf = str

    for bed in Bed.iterator(infile1):
        try:
            overlaps = idx[bed.contig].find(bed.start, bed.end)
        except (KeyError, IndexError):
            # ignore missing contig and zero length intervals
            continue

        for o in overlaps:
            outfile.write("\t".join((outf(bed), outf(o[2]))) + "\n")

    E.stop()
Пример #2
0
    def __init__(self, filename, *args, **kwargs):

        assert filename is not None,\
            "please supply filename for CounterOverlap"

        Counter.__init__(self, *args, **kwargs)

        self.filename = filename

        E.info("reading intervals from %s" % self.filename)

        self.index = Bed.readAndIndex(iotools.open_file(self.filename, "r"),
                                      per_track=True)

        E.info("read intervals for %s tracks" % len(self.index))

        self.tracks = list(self.index.keys())
        self.headers = []
        for track in self.tracks:
            self.headers.extend(["%s_nover" % track, "%s_bases" % track])
Пример #3
0
 def buildIndex(self, filename):
     return Bed.readAndIndex(iotools.open_file(filename, "r"))
Пример #4
0
 def __init__(self, filename):
     self.mIndices = Bed.readAndIndex(iotools.open_file(filename, "r"),
                                      per_track=True)
Пример #5
0
def annotateCpGIslands(infiles, outfile):
    '''annotate transcript by absence/presence of CpG islands
    '''
    cpgfile, tssfile = infiles
    cpg = Bed.readAndIndex(iotools.openFile(cpgfile))

    extension_upstream = PARAMS["cpg_search_upstream"]
    extension_downstream = PARAMS["cpg_search_downstream"]

    c = E.Counter()
    outf = iotools.openFile(outfile, "w")
    outf.write(
        "transcript_id\tstrand\tstart\tend\trelative_start\trelative_end\n")

    for tss in Bed.iterator(iotools.openFile(tssfile)):
        c.tss_total += 1

        if tss.strand == "+":
            start, end = tss.start - \
                extension_upstream, tss.start + extension_downstream
        else:
            start, end = tss.end - \
                extension_downstream, tss.end + extension_upstream

        try:
            matches = list(cpg[tss.contig].find(start, end))
        except KeyError:
            c.promotor_without_matches += 1
            continue

        if len(matches) == 0:
            c.promotor_without_matches += 1
            continue

        c.promotor_output += 1
        for match in matches:
            c.matches_total += 1
            genome_start, genome_end, x = match

            l = genome_end - genome_start

            # get relative location of match
            if tss.strand == "+":
                relative_start = genome_start - tss.start
            else:
                relative_start = tss.end - genome_end

            relative_end = relative_start + l

            outf.write("\t".join(
                map(str, (tss.name, tss.strand, genome_start, genome_end,
                          relative_start, relative_end))) + "\n")
            c.matches_output += 1

    outf.close()

    with iotools.openFile(outfile + ".summary", "w") as outf:
        outf.write("category\tcounts\n")
        outf.write(c.asTable() + "\n")

    E.info(c)
Пример #6
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(
        version=
        "%prog version: $Id: bed2graph.py 2861 2010-02-23 17:36:32Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option(
        "-o",
        "--output-section",
        dest="output",
        type="choice",
        choices=("full", "name"),
        help=
        "output either ``full`` overlapping entries, only the ``name``s. [default=%default]."
    )

    parser.set_defaults(output="full", )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.start(parser, argv=argv)

    if len(args) != 2:
        raise ValueError("two arguments required")

    if args[0] == "-":
        infile1 = options.stdin
    else:
        infile1 = iotools.open_file(args[0], "r")

    infile2 = iotools.open_file(args[1], "r")

    idx = Bed.readAndIndex(infile2, with_values=True)

    output = options.output
    outfile = options.stdout

    if output == "name":
        outfile.write("name1\tname2\n")
        outf = lambda x: x.fields[0]
    else:
        outf = str

    for bed in Bed.iterator(infile1):
        try:
            overlaps = idx[bed.contig].find(bed.start, bed.end)
        except (KeyError, IndexError):
            # ignore missing contig and zero length intervals
            continue

        for o in overlaps:
            outfile.write("\t".join((outf(bed), outf(o[2]))) + "\n")

    E.stop()