def test_extraneous_label(self): weights = CounterMap() weights['dog'] = Counter({'warm': 2.0, 'fuzzy': 0.5}) labels = set(weights.iterkeys()) logp = maxent.get_log_probabilities(self.features, weights, labels) self.assertEqual(logp['cat'], float('-inf'))
def test_zero_weight(self): weights = CounterMap() weights['dog'] = Counter({'warm': 2.0}) labels = set(weights.iterkeys()) logp = maxent.get_log_probabilities(self.features, weights, labels) self.assertEqual(logp['dog'], 0.0)
def test_zero_weight(self): weights = CounterMap() weights['dog'] = Counter({'warm' : 2.0}) labels = set(weights.iterkeys()) logp = maxent.get_log_probabilities(self.features, weights, labels) self.assertEqual(logp['dog'], 0.0)
def test_extraneous_label(self): weights = CounterMap() weights['dog'] = Counter({'warm' : 2.0, 'fuzzy' : 0.5}) labels = set(weights.iterkeys()) logp = maxent.get_log_probabilities(self.features, weights, labels) self.assertEqual(logp['cat'], float('-inf'))
def setUp(self): self.features = Counter((key, 1.0) for key in ['warm', 'fuzzy']) self.weights = CounterMap() self.weights['dog'] = Counter({'warm' : 2.0, 'fuzzy' : 0.5}) self.weights['cat'] = Counter({'warm' : 0.5, 'fuzzy' : 2.0}) self.labels = set(self.weights.iterkeys()) self.logp = maxent.get_log_probabilities(self.features, self.weights, self.labels)
def setUp(self): self.features = Counter((key, 1.0) for key in ['warm', 'fuzzy']) self.weights = CounterMap() self.weights['dog'] = Counter({'warm': 2.0, 'fuzzy': 0.5}) self.weights['cat'] = Counter({'warm': 0.5, 'fuzzy': 2.0}) self.labels = set(self.weights.iterkeys()) self.logp = maxent.get_log_probabilities(self.features, self.weights, self.labels)
def test_fast_slow_equal(self): weights = CounterMap() weights['cat'] = Counter( (key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium', 'large')) weights['bear'] = Counter( (key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium', 'large')) log_probs = [ maxent.get_log_probabilities(datum[1], weights, self.labels) for datum in self.labeled_extracted_features ] slow_expectation = maximumentropy.slow_expected_counts( self.labeled_extracted_features, self.labels, log_probs) fast_expectation = maxent.get_expected_counts( self.labeled_extracted_features, self.labels, log_probs, CounterMap()) self.assertEqual(slow_expectation, fast_expectation) # And try again with different weights weights['cat'] = Counter( (key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium')) weights['bear'] = Counter( (key, 1.0) for key in ('fuzzy', 'claws', 'big')) log_probs = [ maxent.get_log_probabilities(datum[1], weights, self.labels) for datum in self.labeled_extracted_features ] slow_expectation = maximumentropy.slow_expected_counts( self.labeled_extracted_features, self.labels, log_probs) fast_expectation = maxent.get_expected_counts( self.labeled_extracted_features, self.labels, log_probs, CounterMap()) self.assertEqual(slow_expectation, fast_expectation)
def test_fast_slow_equal(self): weights = CounterMap() weights['cat'] = Counter((key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium', 'large')) weights['bear'] = Counter((key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium', 'large')) log_probs = [maxent.get_log_probabilities(datum[1], weights, self.labels) for datum in self.labeled_extracted_features] slow_expectation = maximumentropy.slow_expected_counts(self.labeled_extracted_features, self.labels, log_probs) fast_expectation = maxent.get_expected_counts(self.labeled_extracted_features, self.labels, log_probs, CounterMap()) self.assertEqual(slow_expectation, fast_expectation) # And try again with different weights weights['cat'] = Counter((key, 1.0) for key in ('fuzzy', 'claws', 'small', 'medium')) weights['bear'] = Counter((key, 1.0) for key in ('fuzzy', 'claws', 'big')) log_probs = [maxent.get_log_probabilities(datum[1], weights, self.labels) for datum in self.labeled_extracted_features] slow_expectation = maximumentropy.slow_expected_counts(self.labeled_extracted_features, self.labels, log_probs) fast_expectation = maxent.get_expected_counts(self.labeled_extracted_features, self.labels, log_probs, CounterMap()) self.assertEqual(slow_expectation, fast_expectation)
def test_performance(self): """ C api should be faster than python API (this is potentialy flakey, depending on system load patterns) """ start = time.time() for i in xrange(100000): test = maximumentropy.slow_log_probs(self.features, self.weights, self.labels) slow_time = time.time() - start start = time.time() for i in xrange(100000): test = maxent.get_log_probabilities(self.features, self.weights, self.labels) fast_time = time.time() - start self.assertTrue(fast_time < slow_time)
def test_uneven_weights(self): weights = CounterMap() weights['dog'] = Counter({'warm': 2.0, 'fuzzy': 1.0}) weights['cat'] = Counter({'warm': 1.0, 'fuzzy': 1.0}) labels = set(weights.iterkeys()) logp = maxent.get_log_probabilities(self.features, weights, labels) # construct scores scores = Counter() scores['dog'] = 2.0 * 1.0 + 1.0 * 1.0 scores['cat'] = 1.0 * 1.0 + 1.0 * 1.0 scores.log_normalize() # check scores explicitly self.assertAlmostEqual(scores['dog'], log(0.731), 3) self.assertAlmostEqual(scores['cat'], log(0.269), 3) # check that log probs is correct self.assertEqual(logp['dog'], scores['dog']) self.assertEqual(logp['cat'], scores['cat'])
def test_uneven_weights(self): weights = CounterMap() weights['dog'] = Counter({'warm' : 2.0, 'fuzzy' : 1.0}) weights['cat'] = Counter({'warm' : 1.0, 'fuzzy' : 1.0}) labels = set(weights.iterkeys()) logp = maxent.get_log_probabilities(self.features, weights, labels) # construct scores scores = Counter() scores['dog'] = 2.0 * 1.0 + 1.0 * 1.0 scores['cat'] = 1.0 * 1.0 + 1.0 * 1.0 scores.log_normalize() # check scores explicitly self.assertAlmostEqual(scores['dog'], log(0.731), 3) self.assertAlmostEqual(scores['cat'], log(0.269), 3) # check that log probs is correct self.assertEqual(logp['dog'], scores['dog']) self.assertEqual(logp['cat'], scores['cat'])
from itertools import izip, repeat, chain from maxent import get_log_probabilities, get_expected_counts from countermap import CounterMap from counter import Counter def cnter(l): return Counter(izip(l, repeat(1.0, len(l)))) training_data = (('cat', cnter( ('fuzzy', 'claws', 'small'))), ('bear', cnter( ('fuzzy', 'claws', 'big'))), ('cat', cnter(('claws', 'medium')))) labels = set([label for label, _ in training_data]) features = set() for _, counter in training_data: features.update(set(counter.keys())) weights = CounterMap() log_probs = list() for pos, (label, features) in enumerate(training_data): log_probs.append(get_log_probabilities(features, weights, labels)) test = get_expected_counts(training_data, labels, log_probs, CounterMap()) print test
from itertools import izip, repeat, chain from maxent import get_log_probabilities, get_expected_counts from countermap import CounterMap from counter import Counter def cnter(l): return Counter(izip(l, repeat(1.0, len(l)))) training_data = (('cat', cnter(('fuzzy', 'claws', 'small'))), ('bear', cnter(('fuzzy', 'claws', 'big'))), ('cat', cnter(('claws', 'medium')))) labels = set([label for label, _ in training_data]) features = set() for _, counter in training_data: features.update(set(counter.keys())) weights = CounterMap() log_probs = list() for pos, (label, features) in enumerate(training_data): log_probs.append(get_log_probabilities(features, weights, labels)) test = get_expected_counts(training_data, labels, log_probs, CounterMap()) print test