示例#1
0
文件: score.py 项目: pombredanne/swga
def score_set(
        set_id,
        bg_dist_mean,
        primers,
        chr_ends,
        score_fun,
        score_expression,
        max_fg_bind_dist,
        bg_genome_len=None,
        interactive=False):

    binding_locations = swga.locate.linearize_binding_sites(primers, chr_ends)
    max_dist = max(swga.score.seq_diff(binding_locations))

    # Abort now if it's not passing filter (and it's not a user-supplied set)
    if not interactive and max_dist > max_fg_bind_dist:
        return False, max_dist

    if not bg_dist_mean and not bg_genome_len:
        swga.error("Neither background length nor ratio were provided, "
                   "cannot calculate bg_dist_mean")
    elif not bg_dist_mean:
        bg_dist_mean = float(bg_genome_len)/sum(p.bg_freq for p in primers)

    set_score, variables = score_fun(
        primer_set=primers,
        primer_locs=binding_locations,
        max_dist=max_dist,
        bg_dist_mean=bg_dist_mean)

    add_set = True
    # If it's user-supplied, they have the option of not adding it to db
    if interactive:
        set_dict = dict(
            {'score': set_score,
             'scoring_fn': score_expression}.items() + variables.items())
        swga.message("Set statistics:\n - " + "\n - ".join(
            fmtkv(k, v) for k, v in set_dict.items()))

        if click.confirm("Add set to database?"):
            # User-provided sets have negative numbers, so we find the smallest
            # and decrement by 1
            set_id = Set.select(fn.Min(Set._id)).scalar() - 1
        else:
            add_set = False

    if add_set:
        s = swga.database.add_set(
            _id=set_id,
            primers=primers,
            score=set_score,
            scoring_fn=score_expression,
            **variables)
        set_added = s is not None

        if interactive and set_added:
            swga.message("Set {} added successfully.".format(set_id))
        elif interactive:
            swga.message("That primer set already exists.")
    return set_added, max_dist
示例#2
0
def main(argv, cfg_file):
    cmd = Command('find_sets', cfg_file=cfg_file)
    score_cmd = Command('score', cfg_file=cfg_file)
    cmd.parse_args(argv)
    score_cmd.parse_args(argv)

    init_db(cmd.primer_db)

    # We need to clear all the previously-used sets each time due to uniqueness
    # constraints
    allsets = Set.select()
    if allsets.count() > 0:
        if not cmd.force:
            click.confirm("Remove all previously-found sets?", abort=True)
        for s in progress.bar(allsets, expected_size=allsets.count()):
            s.primers.clear()
            s.delete_instance()

    make_graph(cmd.max_dimer_bp, graph_fname)

    swga.message("Now finding sets. If nothing appears, try relaxing your parameters.")
    if cmd.workers <= 1:
        setlines = setfinder.find_sets(
            cmd.min_bg_bind_dist,
            cmd.min_size,
            cmd.max_size,
            cmd.bg_genome_len,
            graph_fp=graph_fname)
    else:
        setlines = setfinder.mp_find_sets(
            nprocesses=cmd.workers,
            graph_fp=graph_fname,
            min_bg_bind_dist=cmd.min_bg_bind_dist,
            min_size=cmd.min_size,
            max_size=cmd.max_size,
            bg_genome_len=cmd.bg_genome_len)

    score_sets(
        setlines,
        cmd.fg_genome_fp,
        score_cmd.score_expression,
        cmd.max_fg_bind_dist,
        cmd.max_sets)
示例#3
0
def summary(primer_db, fg_length, bg_length):

    db = swga.database.init_db(primer_db)
    db.connect()
    swga.database.create_tables(drop=False)

    avg_fg_bind, avg_bg_bind, nprimers = (
        Primer
        .select(fn.Avg(Primer.fg_freq),
                fn.Avg(Primer.bg_freq),
                fn.Count(Primer.seq))
        .scalar(as_tuple=True))

    if (avg_fg_bind is None) or (avg_bg_bind is None):
        raise swga.error(
            "Could not calculate summary statistics; database may be corrupt")

    fg_bind_ratio = avg_fg_bind / float(fg_length)
    bg_bind_ratio = avg_bg_bind / float(bg_length)
    nactive = Primer.select().where(Primer.active==True).count()

    min_tm, max_tm, avg_tm = (
        Primer
        .select(fn.Min(Primer.tm),
                fn.Max(Primer.tm),
                fn.Avg(Primer.tm))
        .where(Primer.active==True)
        .scalar(as_tuple=True))

    nsets = Set.select(fn.Count(Set._id)).scalar()

    if nsets > 0:
        bs = Set.select().order_by(Set.score).limit(1).get()
        bs_primers = ", ".join(swga.database.get_primers_for_set(bs._id)).strip()
        best_set = bs._id
        bs_size = bs.set_size
        bs_score = bs.score
        bs_stats = "- "+"\n - ".join(
            fmtkv(k, v)
            for k, v in bs.__dict__['_data'].items()
            if k not in ["_id", "pids", "score"]
        )

    version_header = (
        "---------------------\n"
        "==== SWGA v{version} ====\n"
        "---------------------\n"
        .format(version=swga.__version__)
    )

    summary_msg = """
    {version_header}

    PRIMER SUMMARY
    ---------------
    There are {nprimers} primers in the database.

    {nactive} are marked as active (i.e., they passed filter steps and will be used to find sets of compatible primers.) {ifzero_primers_msg}

    The average number of foreground genome binding sites is {avg_fg_bind:.0f}.
       (avg binding / genome_length = {fg_bind_ratio:05f})
    The average number of background genome binding sites is {avg_bg_bind:.0f}.
       (avg binding / genome_length = {bg_bind_ratio:05f})

    {melting_tmp_msg}


    SETS SUMMARY
    ---------------
    There are {nsets} sets in the database.
    {set_msg}---------------

    Report generated from {primer_db}
"""

    ifzero_primers_msg = colored.green(
        "Run `swga filter` to identify primers to use."
        if nactive == 0 else "")
    melting_tmp_msg = (
        """The melting temp of the primers ranges between {min_tm:.2f}C and {max_tm:.2f}C with an average of {avg_tm:.2f}C."""
        if nactive > 0 and min_tm and max_tm else
        "No melting temps have been calculated yet.")
    ifzero_sets_msg = colored.green(
        "Run `swga find_sets` after identifying valid primers to begin collecting sets.\n")

    set_msg = ("""
    The best scoring set is #{best_set}, with {bs_size} primers and a score of {bs_score:03f}.\nVarious statistics:
    {bs_stats}
The primers in Set {best_set} are:
    {bs_primers}
    """ if nsets > 0 else ifzero_sets_msg)



    primer_db = os.path.abspath(primer_db)
    nprimers = colored.blue(nprimers, bold=True)
    nactive = colored.blue(nactive, bold=True)
    nsets = colored.blue(nsets, bold=True)
    set_msg = set_msg.format(**locals())
    melting_tmp_msg = melting_tmp_msg.format(**locals())
    version_header = colored.green(version_header)
    summary_msg = summary_msg.format(**locals())

    with indent(2):
        puts(max_width(summary_msg, 80))