def fracSimilar(self, other, similar_pairs): """Returns fraction of positions where self[i] is similar to other[i]. similar_pairs must be a dict such that d[(i,j)] exists if i and j are to be counted as similar. Use PairsFromGroups in cogent.util.misc to construct such a dict from a list of lists of similar residues. Truncates at the length of the shorter sequence. Note: current implementation re-creates the distance function each time, so may be expensive compared to creating the distance function using for_seq separately. Returns 0 if one sequence is empty. """ if not self or not other: return 0.0 return for_seq(f = lambda x, y: (x,y) in similar_pairs, \ normalizer=per_shortest)(self, other)
(either here or in another module), but they're all general enough that putting them in SequenceI seems like a reasonable compromise. """ from __future__ import division from random import shuffle from old_cogent.util.transform import keep_chars, for_seq, per_shortest, per_longest from old_cogent.parse.record import MappedRecord from old_cogent.util.misc import Delegator, ConstrainedString, ConstrainedList, \ ConstrainedContainer, ConstraintError, DistanceFromMatrix from old_cogent.base.info import Info as InfoClass from old_cogent.base.alphabet import DnaAlphabet, RnaAlphabet, ProteinAlphabet from string import maketrans from operator import eq, ne #standard distance functions: left because generally useful frac_same = for_seq(f=eq, aggregator=sum, normalizer=per_shortest) frac_diff = for_seq(f=ne, aggregator=sum, normalizer=per_shortest) class SequenceI(Delegator): """Sequence object interface. SequenceI should be treated as an abstract class (it basically allows for implementations of immutable and immutable sequences that inherit from different built-in types). Mostly, it delegates sequence methods to that sequence's Alphabet, passing in the sequence as data. However, it will not raise an exception if you instantiate it directly. Alphabet is a synonym for Constraint. Cannot set Alphabet in sequence init directly (though it can be changed afterwards if necessary): should instead set as class data.
def test_for_seq(self): """for_seq should return the correct function""" is_eq = lambda x,y: x == y is_ne = lambda x,y: x != y lt_5 = lambda x,y: x + y < 5 diff = lambda x,y: x - y sumsq = lambda x: sum([i*i for i in x]) long_norm = lambda s, x, y: (s + 0.0) / max(len(x), len(y)) times_two = lambda s, x, y: 2*s empty = [] s1 = [1,2,3,4,5] s2 = [1,3,2,4,5] s3 = [1,1,1,1,1] s4 = [5,5,5,5,5] s5 = [3,3,3,3,3] short = [1] #test behavior with default aggregator and normalizer f = for_seq(is_eq) self.assertFloatEqual(f(s1, s1), 1.0) self.assertFloatEqual(f(s1, short), 1.0) self.assertFloatEqual(f(short, s1), 1.0) self.assertFloatEqual(f(short, s4), 0.0) self.assertFloatEqual(f(s4, short), 0.0) self.assertFloatEqual(f(s1,s2), 0.6) f = for_seq(is_ne) self.assertFloatEqual(f(s1, s1), 0.0) self.assertFloatEqual(f(s1, short), 0.0) self.assertFloatEqual(f(short, s1), 0.0) self.assertFloatEqual(f(short, s4), 1.0) self.assertFloatEqual(f(s4, short), 1.0) self.assertFloatEqual(f(s1, s2), 0.4) f = for_seq(lt_5) self.assertFloatEqual(f(s3,s3), 1.0) self.assertFloatEqual(f(s3,s4), 0.0) self.assertFloatEqual(f(s2,s3), 0.6) f = for_seq(diff) self.assertFloatEqual(f(s1,s1), 0.0) self.assertFloatEqual(f(s4,s1), 2.0) self.assertFloatEqual(f(s1,s4), -2.0) #test behavior with different aggregator f = for_seq(diff) self.assertFloatEqual(f(s1,s5), 0) f = for_seq(diff, aggregator=sum) self.assertFloatEqual(f(s1,s5), 0) f = for_seq(diff, aggregator=sumsq) self.assertFloatEqual(f(s1,s5), 2.0) #test behavior with different normalizer f = for_seq(diff, aggregator=sumsq, normalizer=None) self.assertFloatEqual(f(s1,s5), 10) f = for_seq(diff, aggregator=sumsq) self.assertFloatEqual(f(s1,s5), 2.0) f = for_seq(diff, aggregator=sumsq, normalizer=times_two) self.assertFloatEqual(f(s1,s5), 20) f = for_seq(diff, aggregator=sumsq) self.assertFloatEqual(f(s5,short), 4) f = for_seq(diff, aggregator=sumsq, normalizer=long_norm) self.assertFloatEqual(f(s5,short), 0.8)