def test_embl_to_gb(self): # EMBL records have more features than genbank, (ex more than one date, # embl class, DOI cross references) so I can't convert an embl to gb # and then to embl keeping all those data. But I can start from # genbank record # do embl file -> embl object -> gb file -> gb object -> # embl file. Ensure that first and last files are identical embl = DNA.read(self.single_rna_simple_fp, format="embl") # "write" genbank record in a embl file with io.StringIO() as fh: DNA.write(embl, format="genbank", file=fh) # read genbank file fh.seek(0) genbank = DNA.read(fh, format="genbank") # "write" genbank record in a embl file with io.StringIO() as fh: DNA.write(genbank, format="embl", file=fh) # read file object obs = fh.getvalue() # test objects with open(self.single_rna_simple_fp) as fh: exp = fh.read() self.assertEqual(exp, obs)
def test_gb_to_embl(self): genbank = DNA.read(self.genbank_fp, format="genbank") with io.StringIO() as fh: DNA.write(genbank, format="embl", file=fh) # EMBL can't deal with genbank version (ie M14399.1 GI:145229) # read embl data and write to gb fh.seek(0) embl = DNA.read(fh, format="embl") with io.StringIO() as fh: DNA.write(embl, format="genbank", file=fh) # read gb data obs = fh.getvalue() with open(self.genbank_fp) as fh: exp = fh.read() self.assertEqual(exp, obs)
from __future__ import division import os import skbio from skbio import DNA mydir = os.path.expanduser("~/GitHub/LTDE/") fa_path = mydir + 'data/align/reseq_sanger.fasta' fa = skbio.io.read(fa_path, format='fasta') KBS0710_8F = DNA.read(fa_path, seq_num=1) KBS0710_1492R = DNA.read(fa_path, seq_num=2).complement(reverse=True) KBS0721_8F = DNA.read(fa_path, seq_num=3) KBS0721_1492R = DNA.read(fa_path, seq_num=4).complement(reverse=True) print(KBS0721_8F) print(KBS0721_1492R) #print(len(KBS0721_8F)) #print(len(KBS0721_1492R))
from skbio import DNA from skbio.alignment import global_pairwise_align_nucleotide s1 = DNA.read("data/seq1") s2 = DNA.read("data/seq2") query = DNA("TTTTCTTGTTGATTCTGGTCCAGAGTAATCGCTTGAGTGTTG") def pairwise_similarity(seq, query): alignment = global_pairwise_align_nucleotide(seq, query) return alignment[0].fraction_same(alignment[1]) print "seq1: %s\nseq2: %s" % (s1, s2) print "seq1-query: %s" % pairwise_similarity(s1, query) print "seq2-query: %s" % pairwise_similarity(s2, query)