Exemplo n.º 1
0
 def test_replace_tags_by_motifs(self):
     seq_ids_to_motifs = {'SEQ1': [], 'SEQ2': []}
     with query_session() as session:
         seq_ids_to_motifs['SEQ1'] = dao.motif.find_motifs_by_seq_id(
             session, 'SEQ1', MOTIF_VERSION)
         seq_ids_to_motifs['SEQ2'] = dao.motif.find_motifs_by_seq_id(
             session, 'SEQ2', MOTIF_VERSION)
     mids_to_tags = {seq_ids_to_motifs['SEQ1'][0].id: ['a', 'b', 'c']}
     cls = dao.motif.get_entity(MOTIF_VERSION)
     with query_session() as session:
         motifs = session.query(cls).filter(cls.id.in_(
             mids_to_tags.keys())).all()
     self.assertEqual(1, len(motifs))
     self.assertTrue(motifs[0].correct)
     dao.replace_tags_by_motifs(mids_to_tags, MOTIF_VERSION)
     seq = dao.find_seq_by_id('SEQ1')
     self.assertIsNotNone(seq)
     self.assertEqual(
         10,
         len(
             dao.find_correct_motifs_by_seq_ids([
                 'SEQ1',
             ], MOTIF_VERSION)['SEQ1']))
     with query_session() as session:
         motifs = session.query(cls).filter(cls.id.in_(
             mids_to_tags.keys())).all()
     self.assertEqual(1, len(motifs))
     self.assertTrue(motifs[0].correct)
def query_density(seq_id):
    with dao.query_session() as session:
        motifs = dao.motif.find_motifs_by_seq_id(session,
                                                 seq_id,
                                                 MOTIF_VERSION,
                                                 with_wrong=False)
        seq = dao.find_seq_by_id(seq_id)

    return motifs, seq
def obtain_value(seq_id):
    with dao.query_session() as session:
        motifs = dao.motif.find_motifs_by_seq_id(session,
                                                 seq_id,
                                                 MOTIF_VERSION,
                                                 with_wrong=False)
        nsites = dao.nsite.find_nsites_by_seq_id(session, seq_id)
        seq = dao.find_seq_by_id(seq_id)

    return motifs, nsites, seq
Exemplo n.º 4
0
 def test_add_seq(self):
     seq = dao.find_seq_by_id('SEQ1')
     self.assertIsNotNone(seq)
     self.assertEqual(
         10,
         len(
             dao.find_correct_motifs_by_seq_ids([
                 'SEQ1',
             ], MOTIF_VERSION)['SEQ1']))
     self.assertEqual('SEQABCDEFG', seq.seq)
def read_in_scratch_result(result):
    ss = SeqIO.read(result.ss, 'fasta')
    ss8 = SeqIO.read(result.ss8, 'fasta')
    acc = SeqIO.read(result.acc, 'fasta')
    acc20 = read_in_acc20(result.acc20)
    if acc20 is None:
        return
    if ss.id != ss8.id or ss.id != acc.id or ss.id != acc20.id:
        logging.error(
            str.format(
                "The result for seq {} is invalid, the seq id is different",
                ss.id))
        return
    if len(ss.seq) != len(ss8.seq) or len(ss.seq) != len(acc.seq) or len(
            ss.seq) != len(acc20.seq):
        logging.error(
            str.format(
                "The result for seq {} is invalid, the seq length is different",
                ss.id))
        return

    seq_entity = None
    try:
        seq_entity = dao.find_seq_by_id(ss.id)
    except Exception as e:
        logging.error(
            str.format(
                "Failed to save result for seq {}, can not find the record in the db",
                ss.id))
        return
    if seq_entity is None:
        logging.error(
            str.format(
                "Failed to save result for seq {}, can not find the record in the db",
                ss.id))
        return
    if len(ss.seq) != len(seq_entity.seq):
        logging.error(
            str.format(
                "The result for seq {} is invalid, the seq length is different with db",
                ss.id))
        return
    logging.info("Update scratch result for seq " + ss.id)
    dao.update_seq_for_scratch_result(ss.id, str(ss.seq), str(ss8.seq),
                                      str(acc.seq), ' '.join(acc20.seq))
Exemplo n.º 6
0
 def test_unmark_wrong_with_other_wrong(self):
     # unmark一个wrong motif的时候,考虑已存在的mark为wrong的motif,不应该影响到正常的计算
     set_up_seq('SEQ4', 'SEQ4ABCDABCDABCD', start=10, count=2, step=10)
     seq4 = dao.find_seq_by_id('SEQ4')
     motifs = dao.find_correct_motifs_by_seq_ids([
         'SEQ4',
     ], MOTIF_VERSION)['SEQ4']
     self.assertEqual(2, len(motifs))
     mark_wrong(seq4.id, motifs[1].id)
     mark_wrong(seq4.id, motifs[0].id)
     self.assertEqual(
         0,
         len(
             dao.find_correct_motifs_by_seq_ids([
                 'SEQ4',
             ], MOTIF_VERSION)['SEQ4']))
     result = unmark_wrong(seq4.id, motifs[0].id)
     self.assertEqual(
         1,
         len(
             dao.find_correct_motifs_by_seq_ids([
                 'SEQ4',
             ], MOTIF_VERSION)['SEQ4']))
Exemplo n.º 7
0
def obtain_value(seq_id):
    with dao.query_session() as session:
        nsites = dao.nsite.find_nsites_by_seq_id(session, seq_id)
        seq = dao.find_seq_by_id(seq_id)

    return nsites, seq
import dao
from tools import motifs as motif_tool, pssm_matrix

MOTIF_VERSION = 2

with dao.session_scope() as session:
    matrix = pssm_matrix.calc_pssm_matrix(dao.find_baseline_motifs())

    logging.info(matrix.pssm)

    cls = dao.motif.get_entity(MOTIF_VERSION)
    motifs = session.query(cls).filter(cls.manually_add).all()

    results = []
    for m in motifs:
        seq = dao.find_seq_by_id(m.seq_id)
        motif_seq = seq.seq[m.offset:m.offset + 24]
        score = motif_tool.calc_pssm_score(motif_seq[:16], matrix)
        m.score = score
        results.append({
            'seq_id': m.seq_id,
            'offset': m.offset,
            'seq': motif_seq,
            'score': score
        })

    logging.info(str.format("Total manually records {}", len(results)))
    results.sort(key=lambda k: k['score'])
    for r in results:
        logging.info(
            str.format(