Пример #1
0
def longest(args):
    """
    %prog longest bedfile fastafile

    Select longest feature within overlapping piles.
    """
    from jcvi.formats.sizes import Sizes

    p = OptionParser(longest.__doc__)
    p.add_option("--maxsize", default=20000, type="int", help="Limit max size")
    p.add_option("--minsize", default=60, type="int", help="Limit min size")
    p.add_option("--precedence",
                 default="Medtr",
                 help="Accessions with prefix take precedence")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bedfile, fastafile = args
    maxsize = opts.maxsize
    minsize = opts.minsize
    prec = opts.precedence
    mergedbed = mergeBed(bedfile, nms=True)
    sizes = Sizes(fastafile).mapping
    bed = Bed(mergedbed)

    pf = bedfile.rsplit(".", 1)[0]
    ids = set()
    for b in bed:
        accns = b.accn.split(";")
        prec_accns = [x for x in accns if x.startswith(prec)]
        if prec_accns:
            accns = prec_accns
        accn_sizes = [(sizes.get(x, 0), x) for x in accns]
        accn_sizes = [(size, x) for size, x in accn_sizes if size < maxsize]
        if not accn_sizes:
            continue
        max_size, max_accn = max(accn_sizes)
        if max_size < minsize:
            continue
        ids.add(max_accn)

    newids = remove_isoforms(ids)
    logging.debug("Remove isoforms: before={0} after={1}".\
                    format(len(ids), len(newids)))

    longestidsfile = pf + ".longest.ids"
    fw = open(longestidsfile, "w")
    print >> fw, "\n".join(newids)
    fw.close()
    logging.debug("A total of {0} records written to `{1}`.".\
                    format(len(newids), longestidsfile))

    longestbedfile = pf + ".longest.bed"
    some([
        bedfile, longestidsfile, "--outfile={0}".format(longestbedfile),
        "--no_strip_names"
    ])
Пример #2
0
Файл: bed.py Проект: yangjl/jcvi
def longest(args):
    """
    %prog longest bedfile fastafile

    Select longest feature within overlapping piles.
    """
    from jcvi.formats.sizes import Sizes

    p = OptionParser(longest.__doc__)
    p.add_option("--maxsize", default=20000, type="int",
                 help="Limit max size")
    p.add_option("--minsize", default=60, type="int",
                 help="Limit min size")
    p.add_option("--precedence", default="Medtr",
                 help="Accessions with prefix take precedence")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bedfile, fastafile = args
    maxsize = opts.maxsize
    minsize = opts.minsize
    prec = opts.precedence
    mergedbed = mergeBed(bedfile, nms=True)
    sizes = Sizes(fastafile).mapping
    bed = Bed(mergedbed)

    pf = bedfile.rsplit(".", 1)[0]
    ids = set()
    for b in bed:
        accns = b.accn.split(";")
        prec_accns = [x for x in accns if x.startswith(prec)]
        if prec_accns:
            accns = prec_accns
        accn_sizes = [(sizes.get(x, 0), x) for x in accns]
        accn_sizes = [(size, x) for size, x in accn_sizes if size < maxsize]
        if not accn_sizes:
            continue
        max_size, max_accn = max(accn_sizes)
        if max_size < minsize:
            continue
        ids.add(max_accn)

    newids = remove_isoforms(ids)
    logging.debug("Remove isoforms: before={0} after={1}".\
                    format(len(ids), len(newids)))

    longestidsfile = pf + ".longest.ids"
    fw = open(longestidsfile, "w")
    print >> fw, "\n".join(newids)
    fw.close()
    logging.debug("A total of {0} records written to `{1}`.".\
                    format(len(newids), longestidsfile))

    longestbedfile = pf + ".longest.bed"
    some([bedfile, longestidsfile, "--outfile={0}".format(longestbedfile),
            "--no_strip_names"])