Пример #1
0
def score_sequences(_pair, args):
    # Calculate the best possible scores, and divide by the observed scores
    id1, id2 = _pair
    alb_obj, psi_pred_files, outfile = args
    id_regex = "^%s$|^%s$" % (id1, id2)
    alb_copy = Alb.make_copy(alb_obj)
    Alb.pull_records(alb_copy, id_regex)
    observed_score = 0
    seq1_best = 0
    seq2_best = 0
    seq1, seq2 = alb_copy.records()
    prev_aa1 = "-"
    prev_aa2 = "-"

    for aa_pos in range(alb_copy.lengths()[0]):
        aa1 = seq1.seq[aa_pos]
        aa2 = seq2.seq[aa_pos]

        if aa1 != "-":
            seq1_best += BLOSUM62[aa1, aa1]
        if aa2 != "-":
            seq2_best += BLOSUM62[aa2, aa2]

        if aa1 == "-" or aa2 == "-":
            if prev_aa1 == "-" or prev_aa2 == "-":
                observed_score += gap_extend
            else:
                observed_score += gap_open
        else:
            observed_score += BLOSUM62[aa1, aa2]
        prev_aa1 = str(aa1)
        prev_aa2 = str(aa2)

    subs_mat_score = ((observed_score / seq1_best) + (observed_score / seq1_best)) / 2

    # PSI PRED comparison
    num_gaps = 0
    ss_score = 0
    for row1 in psi_pred_files[id1].itertuples():
        if (psi_pred_files[id2]["indx"] == row1.indx).any():
            row2 = psi_pred_files[id2].loc[psi_pred_files[id2]["indx"] == row1.indx]
            row_score = 0
            row_score += 1 - abs(float(row1.coil_prob) - float(row2.coil_prob))
            row_score += 1 - abs(float(row1.helix_prob) - float(row2.helix_prob))
            row_score += 1 - abs(float(row1.sheet_prob) - float(row2.sheet_prob))
            ss_score += row_score / 3
        else:
            num_gaps += 1

    align_len = len(psi_pred_files[id2]) + num_gaps
    ss_score /= align_len
    final_score = (ss_score * 0.3) + (subs_mat_score * 0.7)
    with lock:
        with open(outfile, "a") as _ofile:
            _ofile.write("\n%s,%s,%s" % (id1, id2, final_score))
    return
Пример #2
0
def score_sequences(seq_pair):
    seq1, seq2 = seq_pair.records
    id_regex = "^%s$|^%s$" % (seq1.id, seq2.id)
    sb_copy = Sb.make_copy(seqbuddy)
    Sb.delete_records(sb_copy, id_regex)
    sb_copy = Sb.SeqBuddy(sb_copy.records + [seq1, seq2], out_format="gb", alpha=sb_copy.alpha)
    alignbuddy = Alb.generate_msa(sb_copy, tool="mafft", params=" --globalpair", quiet=True)
    if not in_args.no_msa_trim:
        alignbuddy = Alb.trimal(alignbuddy, threshold="gappyout")
    alignbuddy = Alb.pull_records(alignbuddy, id_regex)
    _score = 0
    seq1, seq2 = alignbuddy.records()
    prev_aa1 = "-"
    prev_aa2 = "-"

    for aa_pos in range(alignbuddy.lengths()[0]):
        aa1 = seq1.seq[aa_pos]
        aa2 = seq2.seq[aa_pos]

        if aa1 == "-" or aa2 == "-":
            if prev_aa1 == "-" or prev_aa2 == "-":
                _score += gap_extend
            else:
                _score += gap_open
        else:
            _score += BLOSUM45[aa1, aa2]
        prev_aa1 = str(aa1)
        prev_aa2 = str(aa2)
    return _score
Пример #3
0
def test_pull_records(key, next_hash, alb_resources, hf):
    alignbuddy = alb_resources.get_one(key)
    Alb.pull_records(alignbuddy, "α[1-5]$|β[A-M]")
    assert hf.buddy2hash(alignbuddy) == next_hash, alignbuddy.write("error_files%s%s" % (next_hash, os.path.sep))