示例#1
0
def test_generate_traindata():

    prots = ['env', 'gag', 'nef', 'pol',
             'rev', 'tat', 'vif', 'vpr',
             'vpu']
    for prot in prots:
        X, y = HIVAlignTools.generate_traindata(prot, train_type='pro')
        yield ok_, y.shape[0] > 100, 'Had issue with %s' % prot
        yield eq_, y.shape[0], X.shape[0], 'Had issue with %s' % prot

    X, y = HIVAlignTools.generate_traindata('ltr', train_type='dna')
    yield ok_, y.shape[0] > 100, 'Had issue with %s' % 'LTR'
    yield eq_, y.shape[0], X.shape[0], 'Had issue with %s' % 'LTR'
示例#2
0
def test_score_seq():

    seqs = [('atgtag', 'atgtag', 30.0),
            ('atgtag', 'atg', 15),
            ('GTIJ', 'GTIJ', 15),
            ('GTIJAGATS', 'GTIJAGATS', 38)]

    for s1, s2, score in seqs:
        oscore = HIVAlignTools.score_seq(s1, s2)
        yield eq_, score, oscore
示例#3
0
def test_get_seq():

    prots = ['env', 'gag', 'nef', 'pol',
             'rev', 'tat', 'vif', 'vpr',
             'vpu']
    typs = ['PRO', 'DNA']
    wanted = list(product(prots, typs))+[('genome', 'DNA'),
                                         ('ltr', 'DNA')]
    for gene, typ in wanted:
        names, seqs = HIVAlignTools.get_seq(gene, typ)
        yield ok_, len(names) > 1, 'Had issue with %s, %s' % (gene, typ)
        yield eq_, len(names), seqs.shape[0], 'Had issue with %s, %s' % (gene, typ)
示例#4
0
def test_score_seqs():

    seqs = [('atgtag', 'atgtag', 30.0),
            ('atgtag', 'atg', 15),
            ('GTIJ', 'GTIJ', 15),
            ('GTIJAGATS', 'GTIJAGATS', 38)]

    former = HIVAlignTools.SeqTransformer()
    known = former.transform(np.array([s for s, _, _ in seqs]))
    guess = former.transform(np.array([s for _, s, _ in seqs]))
    score = sum(v for _, _, v in seqs)

    oscore = HIVAlignTools.score_seqs(known, guess)
    eq_(score, oscore)
from Bio import SeqIO
import pandas as pd
import numpy as np
import sys
import os
sys.path.append('/home/will/PySeqUtils/')
from GeneralSeqTools import fasta_reader, fasta_writer
from HIVAlignTools import SeqTransformer, build_aligners

# <codecell>

import HIVAlignTools

# <codecell>

HIVAlignTools.build_aligners()

# <codecell>

import shlex
from subprocess import check_call

def score_seq(known, guess, gapopen=10, gapextend=1):
    
    cmd = 'needle -asequence %(cb)s -bsequence %(seq)s -aformat score -gapopen %(go)f -gapextend %(ge)s -outfile %(out)s'
    with NamedTemporaryFile() as conb_handle:
        fasta_writer(conb_handle, [('SeqA', known)])
        conb_handle.flush()
        os.fsync(conb_handle.fileno())
        with NamedTemporaryFile() as seq_handle:
            fasta_writer(seq_handle, [('Seq1', guess)])