示例#1
0
文件: maker.py 项目: bennyyu/jcvi
def get_accuracy(query, gff_file, evidences_bed, sizesfile, type, key):
    """
    Get sensitivity, specificity and accuracy given gff_file, and a query range
    that look like "chr1:1-10000".
    """
    from jcvi.formats.bed import evaluate

    bed_file = get_bed_file(gff_file, type, key)
    b = evaluate([bed_file, evidences_bed, sizesfile, "--query={0}".format(query)])

    return b
示例#2
0
def get_accuracy(query, gff_file, evidences_bed, sizesfile, type, key):
    """
    Get sensitivity, specificity and accuracy given gff_file, and a query range
    that look like "chr1:1-10000".
    """
    from jcvi.formats.bed import evaluate

    bed_file = get_bed_file(gff_file, type, key)
    b = evaluate([bed_file, evidences_bed, sizesfile, "--query={0}".format(query)])

    return b
示例#3
0
def batcheval(args):
    """
    %prog batcheval model.ids gff_file evidences.bed fastafile

    Get the accuracy for a list of models against evidences in the range of the
    genes. For example:

    $ %prog batcheval all.gff3 isoforms.ids proteins.bed scaffolds.fasta

    Outfile contains the scores for the models can be found in models.scores
    """
    from jcvi.formats.bed import evaluate
    from jcvi.formats.gff import make_index

    p = OptionParser(evaluate.__doc__)
    p.add_option(
        "--type",
        default="CDS",
        help="list of features to extract, use comma to separate (e.g."
        "'five_prime_UTR,CDS,three_prime_UTR') [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 4:
        sys.exit(not p.print_help())

    model_ids, gff_file, evidences_bed, fastafile = args
    type = set(opts.type.split(","))

    g = make_index(gff_file)
    fp = open(model_ids)
    prefix = model_ids.rsplit(".", 1)[0]
    fwscores = open(prefix + ".scores", "w")

    for row in fp:
        cid = row.strip()
        b = next(g.parents(cid, 1))
        query = "{0}:{1}-{2}".format(b.chrom, b.start, b.stop)
        children = [c for c in g.children(cid, 1)]

        cidbed = prefix + ".bed"
        fw = open(cidbed, "w")
        for c in children:
            if c.featuretype not in type:
                continue

            fw.write(c.to_bed())

        fw.close()

        b = evaluate(
            [cidbed, evidences_bed, fastafile, "--query={0}".format(query)])
        print("\t".join((cid, b.score)), file=fwscores)
        fwscores.flush()
示例#4
0
文件: maker.py 项目: bennyyu/jcvi
def batcheval(args):
    """
    %prog batcheval model.ids gff_file evidences.bed fastafile

    Get the accuracy for a list of models against evidences in the range of the
    genes. For example:

    $ %prog batcheval all.gff3 isoforms.ids proteins.bed scaffolds.fasta

    Outfile contains the scores for the models can be found in models.scores
    """
    from jcvi.formats.bed import evaluate
    from jcvi.formats.gff import make_index

    p = OptionParser(evaluate.__doc__)
    p.add_option("--type", default="CDS",
            help="list of features to extract, use comma to separate (e.g."
            "'five_prime_UTR,CDS,three_prime_UTR') [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 4:
        sys.exit(not p.print_help())

    model_ids, gff_file, evidences_bed, fastafile = args
    type = set(opts.type.split(","))

    g = make_index(gff_file)
    fp = open(model_ids)
    prefix = model_ids.rsplit(".", 1)[0]
    fwscores = open(prefix + ".scores", "w")

    for row in fp:
        cid = row.strip()
        b = g.parents(cid, 1).next()
        query = "{0}:{1}-{2}".format(b.chrom, b.start, b.stop)
        children = [c for c in g.children(cid, 1)]

        cidbed = prefix + ".bed"
        fw = open(cidbed, "w")
        for c in children:
            if c.featuretype not in type:
                continue

            fw.write(c.to_bed())

        fw.close()

        b = evaluate([cidbed, evidences_bed, fastafile, "--query={0}".format(query)])
        print >> fwscores, "\t".join((cid, b.score))
        fwscores.flush()