def test_generate_traindata(): prots = ['env', 'gag', 'nef', 'pol', 'rev', 'tat', 'vif', 'vpr', 'vpu'] for prot in prots: X, y = HIVAlignTools.generate_traindata(prot, train_type='pro') yield ok_, y.shape[0] > 100, 'Had issue with %s' % prot yield eq_, y.shape[0], X.shape[0], 'Had issue with %s' % prot X, y = HIVAlignTools.generate_traindata('ltr', train_type='dna') yield ok_, y.shape[0] > 100, 'Had issue with %s' % 'LTR' yield eq_, y.shape[0], X.shape[0], 'Had issue with %s' % 'LTR'
def test_score_seq(): seqs = [('atgtag', 'atgtag', 30.0), ('atgtag', 'atg', 15), ('GTIJ', 'GTIJ', 15), ('GTIJAGATS', 'GTIJAGATS', 38)] for s1, s2, score in seqs: oscore = HIVAlignTools.score_seq(s1, s2) yield eq_, score, oscore
def test_get_seq(): prots = ['env', 'gag', 'nef', 'pol', 'rev', 'tat', 'vif', 'vpr', 'vpu'] typs = ['PRO', 'DNA'] wanted = list(product(prots, typs))+[('genome', 'DNA'), ('ltr', 'DNA')] for gene, typ in wanted: names, seqs = HIVAlignTools.get_seq(gene, typ) yield ok_, len(names) > 1, 'Had issue with %s, %s' % (gene, typ) yield eq_, len(names), seqs.shape[0], 'Had issue with %s, %s' % (gene, typ)
def test_score_seqs(): seqs = [('atgtag', 'atgtag', 30.0), ('atgtag', 'atg', 15), ('GTIJ', 'GTIJ', 15), ('GTIJAGATS', 'GTIJAGATS', 38)] former = HIVAlignTools.SeqTransformer() known = former.transform(np.array([s for s, _, _ in seqs])) guess = former.transform(np.array([s for _, s, _ in seqs])) score = sum(v for _, _, v in seqs) oscore = HIVAlignTools.score_seqs(known, guess) eq_(score, oscore)
from Bio import SeqIO import pandas as pd import numpy as np import sys import os sys.path.append('/home/will/PySeqUtils/') from GeneralSeqTools import fasta_reader, fasta_writer from HIVAlignTools import SeqTransformer, build_aligners # <codecell> import HIVAlignTools # <codecell> HIVAlignTools.build_aligners() # <codecell> import shlex from subprocess import check_call def score_seq(known, guess, gapopen=10, gapextend=1): cmd = 'needle -asequence %(cb)s -bsequence %(seq)s -aformat score -gapopen %(go)f -gapextend %(ge)s -outfile %(out)s' with NamedTemporaryFile() as conb_handle: fasta_writer(conb_handle, [('SeqA', known)]) conb_handle.flush() os.fsync(conb_handle.fileno()) with NamedTemporaryFile() as seq_handle: fasta_writer(seq_handle, [('Seq1', guess)])