def test_state_priors_weights_array_pseudocounts(self): pmat = numpy.random.randint(0, high=255, size=(self.num_states, )) my_counts = numpy.zeros(self.num_states) for my_seq, my_weight in zip(self.test_seqs, self.test_weights): my_counts[my_seq[0]] += my_weight expected_prior_counts = my_counts + pmat expected_prior_freqs = ( 1.0 * expected_prior_counts) / expected_prior_counts.sum() found_prior_counts, found_transition_counts = build_hmm_tables( self.num_states, self.test_seqs, weights=self.test_weights, state_prior_pseudocounts=pmat, normalize=False) found_prior_freqs, found_transition_freqs = build_hmm_tables( self.num_states, self.test_seqs, weights=self.test_weights, state_prior_pseudocounts=pmat, normalize=True) yield check_tuple_equal, found_prior_counts.shape, (self.num_states, ) yield check_array_equal, found_prior_counts, expected_prior_counts yield check_array_equal, found_prior_freqs, expected_prior_freqs
def test_state_priors_weights_int_pseudocounts(self): my_counts = numpy.zeros(self.num_states) for (my_seq, my_weight) in zip(self.test_seqs, self.test_weights): my_counts[my_seq[0]] += my_weight for pcounts in (1, 2, 3): expected_prior_counts = my_counts + pcounts expected_prior_freqs = ( 1.0 * expected_prior_counts) / expected_prior_counts.sum() found_prior_counts, found_transition_counts = build_hmm_tables( self.num_states, self.test_seqs, weights=self.test_weights, state_prior_pseudocounts=pcounts, normalize=False) found_prior_freqs, found_transition_freqs = build_hmm_tables( self.num_states, self.test_seqs, weights=self.test_weights, state_prior_pseudocounts=pcounts, normalize=True) yield check_tuple_equal, found_prior_counts.shape, ( self.num_states, ) yield check_array_equal, found_prior_counts, expected_prior_counts yield check_array_equal, found_prior_freqs, expected_prior_freqs
def test_transitions_weights_array_pseudocounts(self): pmat = numpy.random.randint(0, high=255, size=(self.num_states, self.num_states)) expected_transition_counts = 0 for mat, weight in zip(self.mats, self.test_weights): expected_transition_counts += weight * mat expected_transition_counts += pmat expected_transition_freqs = (1.0 * expected_transition_counts.T / expected_transition_counts.sum(1)).T found_prior_counts, found_transition_counts = build_hmm_tables( self.num_states, self.test_seqs, weights=self.test_weights, transition_pseudocounts=pmat, normalize=False) found_prior_freqs, found_transition_freqs = build_hmm_tables( self.num_states, self.test_seqs, weights=self.test_weights, transition_pseudocounts=pmat, normalize=True) yield check_tuple_equal, found_transition_counts.shape, ( self.num_states, self.num_states) yield check_array_equal, found_transition_counts, expected_transition_counts yield check_array_equal, found_transition_freqs, expected_transition_freqs
def test_transitions_weights_int_pseudocounts(self): my_counts = 0 for mat, weight in zip(self.mats, self.test_weights): my_counts += weight * mat for pcounts in (1, 2, 3): expected_transition_counts = my_counts + pcounts expected_transition_freqs = (1.0 * expected_transition_counts.T / expected_transition_counts.sum(1)).T found_prior_counts, found_transition_counts = build_hmm_tables( self.num_states, self.test_seqs, weights=self.test_weights, transition_pseudocounts=pcounts, normalize=False) found_prior_freqs, found_transition_freqs = build_hmm_tables( self.num_states, self.test_seqs, weights=self.test_weights, transition_pseudocounts=pcounts, normalize=True) yield check_tuple_equal, found_transition_counts.shape, ( self.num_states, self.num_states) yield check_array_equal, found_transition_counts, expected_transition_counts yield check_array_equal, found_transition_freqs, expected_transition_freqs
def test_transitions_no_weights_no_pseudocounts(self): expected_transition_counts = sum(self.mats) expected_transition_freqs = (1.0 * expected_transition_counts.T / expected_transition_counts.sum(1)).T found_prior_counts, found_transition_counts = build_hmm_tables( self.num_states, self.test_seqs, weights=None, transition_pseudocounts=0, normalize=False) found_prior_freqs, found_transition_freqs = build_hmm_tables( self.num_states, self.test_seqs, weights=None, transition_pseudocounts=0, normalize=True) yield check_tuple_equal, found_transition_counts.shape, ( self.num_states, self.num_states) yield check_array_equal, found_transition_counts, expected_transition_counts yield check_array_equal, found_transition_freqs, expected_transition_freqs
def test_transitions_alternate_initializer(self): expected_transition_counts = 0 for mat, weight in zip(self.mats, self.test_weights): expected_transition_counts += weight * mat found_prior_counts, found_transition_counts = build_hmm_tables( self.num_states, self.test_seqs, weights=self.test_weights, normalize=False, initializer=scipy.sparse.dok_matrix) found_dense = found_transition_counts.todense() assert_array_equal(found_dense, expected_transition_counts)
def test_state_priors_no_weights_no_pseudocounts(self): expected_prior_counts = numpy.zeros(self.num_states) for my_seq in self.test_seqs: expected_prior_counts[my_seq[0]] += 1 expected_prior_freqs = ( 1.0 * expected_prior_counts) / expected_prior_counts.sum() found_prior_counts, found_transition_counts = build_hmm_tables( self.num_states, self.test_seqs, weights=None, transition_pseudocounts=0, normalize=False) found_prior_freqs, found_transition_freqs = build_hmm_tables( self.num_states, self.test_seqs, weights=None, transition_pseudocounts=0, normalize=True) yield check_tuple_equal, found_prior_counts.shape, (self.num_states, ) yield check_array_equal, found_prior_counts, expected_prior_counts yield check_array_equal, found_prior_freqs, expected_prior_freqs