def test_kmeans_a(): """Test Part A of the ClusterGroup class.""" print ' Testing Part A of class ClusterGroup' # A dataset with four points almost in a square items = [[0.,0.], [10.,1.], [10.,10.], [0.,9.]] dset = a6.Dataset(2, items) # Test creating a clustering with random seeds km = a6.ClusterGroup(dset, 3) # Should have 3 clusters cornelltest.assert_equals(len(km.getClusters()), 3) for clust in km.getClusters(): # cluster centroids should have been chosen from items cornelltest.assert_true(clust.getCentroid() in items) # cluster centroids should be distinct (since items are) for clust2 in km.getClusters(): if clust2 is not clust: cornelltest.assert_float_lists_not_equal(clust.getCentroid(), clust2.getCentroid()) print ' Random ClusterGroup initialization looks okay' # Clusterings of that dataset, with two and three deterministic clusters km = a6.ClusterGroup(dset, 2, [0,2]) cornelltest.assert_equals(items[0], km.getClusters()[0].getCentroid()) cornelltest.assert_equals(items[2], km.getClusters()[1].getCentroid()) km = a6.ClusterGroup(dset, 3, [0,2,3]) cornelltest.assert_equals(items[0], km.getClusters()[0].getCentroid()) cornelltest.assert_equals(items[2], km.getClusters()[1].getCentroid()) cornelltest.assert_equals(items[3], km.getClusters()[2].getCentroid()) print ' Seeded ClusterGroup initialization looks okay' print ' Part A of class ClusterGroup appears correct' print ''
def testE(): """Test Part E (of Part I) of the assignment. This tests the final part of K-means. It gets a lot easier from here. As with the test for Part D, we have to use random.seed to fix the random number generator.""" print ' Testing Part E' # Force the random number generator to not be random random.seed(3) # More interesting result than a seed of 1 # FOR ALL TEST CASES # Create and initialize a non-empty database items = [[0.5,0.5,0.5],[0.5,0.6,0.6],[0.6,0.5,0.6],[0.5,0.6,0.5],[0.5,0.4,0.5],[0.5,0.4,0.4]] dbase = Database(3,items) dbase.setKSize(2) # PRE-TEST: Check first cluster (should be okay if passed part D) cluster1 = dbase.getCluster(0) cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6],cluster1.getCentroid()) cornelltest.assert_float_lists_equal(items[1:3],cluster1.getContents()) # PRE-TEST: Check second cluster (should be okay if passed part D) cluster2 = dbase.getCluster(1) cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5],cluster2.getCentroid()) cornelltest.assert_float_lists_equal([items[0]]+items[3:],cluster2.getContents()) # Make a copy of a cluster (to test update() method) clustertest = Cluster(cluster1.getCentroid()) for point in cluster1.getContents(): clustertest.appendContents(point) # TEST CASE 1 (update) stable = clustertest.update() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid()) cornelltest.assert_false(stable) # Not yet stable # TEST CASE 2 (update) stable = clustertest.update() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid()) cornelltest.assert_true(stable) # Now it is stable print ' Method update() looks okay' # TEST CASE 3 (step) dbase.step() # K size should be unchanged cornelltest.assert_equals(2,dbase.getKSize()) # Check first cluster (WHICH HAS CHANGED!) cluster1 = dbase.getCluster(0) cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],cluster1.getCentroid()) cornelltest.assert_float_lists_equal(items[1:4],cluster1.getContents()) # Check second cluster (WHICH HAS CHANGED!) cluster2 = dbase.getCluster(1) cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475],cluster2.getCentroid()) cornelltest.assert_float_lists_equal([items[0]]+items[4:],cluster2.getContents()) print ' Method step() looks okay' print ' Part E appears correct'
def test_match(): """Test search function match""" print 'Testing function match' expected = ['in', 'it'] pmap = {'': ['i'], 'i': ['t', 'n'], 'in': [''], 'it': ['']} words = a4.match('i?', pmap) assert_lists_equal(expected, words) cornelltest.assert_true('in' and 'it' in words) expected = ['ins', 'its'] pmap = { '':['i'],'i':['t','n'],'in':['','s'], 'it':['','s'],\ 'ins':[''], 'its':['']} words = a4.match('i?s', pmap) assert_lists_equal(expected, words) cornelltest.assert_true('ins' and 'its' in words) expected = ['it', 'at'] pmap = { '':['i','a'],'i':['t','n'],'a':['t'], 'at':[''],\ 'in':[''], 'it':['']} words = a4.match('?t', pmap) assert_lists_equal(expected, words) cornelltest.assert_true('it' and 'at' in words) expected = [] pmap = { '':['i', 'a'],'a':['t'], 'at':[''], 'i':['t','n'],\ 'in':[''], 'it':['']} words = a4.match('p?', pmap) assert_lists_equal(expected, words) expected = ['ins', 'its', 'int'] pmap = { '': ['i'], 'i': ['t', 'n'], 'in': ['', 's', 't'], 'int': [''], 'it': ['', 's'], 'ins': [''], 'its': [''] } words = a4.match('i??', pmap) assert_lists_equal(expected, words) cornelltest.assert_true('ins' and 'its' and 'int' in words) expected = ['in', 'it', 'at'] pmap = { '': ['i', 'a'], 'a': ['t'], 'at': [''], 'i': ['t', 'n'], 'in': ['', 's'], 'it': ['', 's'], 'ins': [''], 'its': [''] } words = a4.match('??', pmap) assert_lists_equal(expected, words) cornelltest.assert_true('in' and 'it' and 'at' in words)
def test_cluster_b(): """Test Part B of the Cluster class assignment.""" print ' Testing Part B of class Cluster' # A dataset with four points items = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0]] dset = a6.Dataset(3, items) # Create two clusters cluster2 = a6.Cluster(dset, [0.5, 0.5, 0.0]) cluster3 = a6.Cluster(dset, [0.0, 0.0, 0.5]) # TEST CASE 1 (distance) dist = cluster2.distance([1.0, 0.0, -1.0]) cornelltest.assert_floats_equal(1.22474487139, dist) # TEST CASE 2 (distance) dist = cluster2.distance([0.5, 0.5, 0.0]) cornelltest.assert_floats_equal(0.0, dist) # TEST CASE 3 (distance) dist = cluster3.distance([0.5, 0.0, 0.5]) cornelltest.assert_floats_equal(0.5, dist) print ' Method Cluster.distance() looks okay' # TEST CASE 1 (updateCentroid): centroid remains the same cluster2.addIndex(0) cluster2.addIndex(1) stable = cluster2.updateCentroid() cornelltest.assert_float_lists_equal([0.5, 0.5, 0.0], cluster2.getCentroid()) cornelltest.assert_true(stable) # TEST CASE 2 (updateCentroid): centroid changes cluster2.addIndex(2) cluster2.addIndex(3) stable = cluster2.updateCentroid() cornelltest.assert_float_lists_equal([0.25, 0.25, 0.25], cluster2.getCentroid()) cornelltest.assert_false(stable) # updating again without changing points: centroid stable stable = cluster2.updateCentroid() cornelltest.assert_float_lists_equal([0.25, 0.25, 0.25], cluster2.getCentroid()) cornelltest.assert_true(stable) print ' Method Cluster.updateCentroid() looks okay' print ' Part B of class Cluster appears correct' print ''
def test_pmap_has_word(): """Test function pmap_has_word""" print 'Testing function pmap_has_word' # Start with an empty prefix map pmap = {} cornelltest.assert_false(a4.pmap_has_word(pmap, 'a')) # Build a pmap manually pmap = {'': ['a'], 'a': ['', 't'], 'at': ['']} cornelltest.assert_false(a4.pmap_has_word(pmap, '')) # NOT A WORD cornelltest.assert_false(a4.pmap_has_word(pmap, 'by')) cornelltest.assert_true(a4.pmap_has_word(pmap, 'a')) cornelltest.assert_true(a4.pmap_has_word(pmap, 'at'))
def test_pmap_has_word(): """Test function pmap_has_word""" print 'Testing function pmap_has_word' # Start with an empty prefix map pmap = {} cornelltest.assert_false(a4.pmap_has_word(pmap, 'a')) # Build a pmap manually pmap = { '':['a'], 'a':['','t'], 'at':[''] } cornelltest.assert_false(a4.pmap_has_word(pmap, '')) # NOT A WORD cornelltest.assert_false(a4.pmap_has_word(pmap, 'by')) cornelltest.assert_true(a4.pmap_has_word(pmap, 'a')) cornelltest.assert_true(a4.pmap_has_word(pmap, 'at'))
def test_cluster_b(): """Test Part B of the Cluster class assignment.""" print ' Testing Part B of class Cluster' # A dataset with four points items = [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,0.0],[0.0,0.0,1.0]] dset = a6.Dataset(3, items) # Create two clusters cluster2 = a6.Cluster(dset, [0.5,0.5,0.0]) cluster3 = a6.Cluster(dset, [0.0,0.0,0.5]) # TEST CASE 1 (distance) dist = cluster2.distance([1.0,0.0,-1.0]) cornelltest.assert_floats_equal(1.22474487139,dist) # TEST CASE 2 (distance) dist = cluster2.distance([0.5,0.5,0.0]) cornelltest.assert_floats_equal(0.0,dist) # TEST CASE 3 (distance) dist = cluster3.distance([0.5,0.0,0.5]) cornelltest.assert_floats_equal(0.5,dist) print ' Method Cluster.distance() looks okay' # TEST CASE 1 (updateCentroid): centroid remains the same cluster2.addIndex(0) cluster2.addIndex(1) stable = cluster2.updateCentroid() cornelltest.assert_float_lists_equal([0.5, 0.5, 0.0], cluster2.getCentroid()) cornelltest.assert_true(stable) # TEST CASE 2 (updateCentroid): centroid changes cluster2.addIndex(2) cluster2.addIndex(3) stable = cluster2.updateCentroid() cornelltest.assert_float_lists_equal([0.25, 0.25, 0.25], cluster2.getCentroid()) cornelltest.assert_false(stable) # updating again without changing points: centroid stable stable = cluster2.updateCentroid() cornelltest.assert_float_lists_equal([0.25, 0.25, 0.25], cluster2.getCentroid()) cornelltest.assert_true(stable) print ' Method Cluster.updateCentroid() looks okay' print ' Part B of class Cluster appears correct' print ''
def test_scrabble(): """Test search function scrabble""" print 'Testing function scrabble' expected = ['the'] pmap = { '': ['t'], 't': ['h'], 'th': ['a', 'e'], 'the': [''], 'tha': ['t'], 'that': [''] } words = a4.scrabble('athet', 3, pmap) assert_lists_equal(expected, words) cornelltest.assert_true('the' in words) expected = [] pmap = { '': ['t'], 't': ['h'], 'th': ['a', 'e'], 'the': [''], 'tha': ['t'], 'that': [''] } words = a4.scrabble('athet', 7, pmap) assert_lists_equal(expected, words) expected = [] pmap = { '': ['t'], 't': ['h'], 'th': ['a', 'e'], 'the': [''], 'tha': ['t'], 'that': [''] } words = a4.scrabble('', 3, pmap) assert_lists_equal(expected, words) expected = ['the', 'ate'] pmap = { '':['t','a'], 't':['h'], 'a':['t'], 'at':['e'], 'ate':[''],\ 'th':['a','e'], 'the':[''], 'tha':['t'], 'that':['']} words = a4.scrabble('athet', 3, pmap) assert_lists_equal(expected, words) cornelltest.assert_true('the' and 'ate' in words)
def test_kmeans_b(): """Test Part B of the ClusterGroup class.""" # This function tests the methods _nearest_cluster and _partition, # both of which are private methods. Normally it's not good form to # directly call these methods from outside the class, but we make an # exception for testing code, which often has to be more tightly # integrated with the implementation of a class than other code that # just uses the class. print ' Testing Part B of class ClusterGroup' # Reinitialize data set items = [[0., 0.], [10., 1.], [10., 10.], [0., 9.]] dset = a6.Dataset(2, items) km1 = a6.ClusterGroup(dset, 2, [0, 2]) km2 = a6.ClusterGroup(dset, 3, [0, 2, 3]) nearest = km1._nearest_cluster([1., 1.]) cornelltest.assert_true(nearest is km1.getClusters()[0]) nearest = km1._nearest_cluster([1., 10.]) cornelltest.assert_true(nearest is km1.getClusters()[1]) nearest = km2._nearest_cluster([1., 1.]) cornelltest.assert_true(nearest is km2.getClusters()[0]) nearest = km2._nearest_cluster([1., 10.]) cornelltest.assert_true(nearest is km2.getClusters()[2]) print ' Method ClusterGroup._nearest_cluster() looks okay' # Testing partition() # For this example points 0 and 3 are closer, as are 1 and 2 km1._partition() cornelltest.assert_equals(set([0, 3]), set(km1.getClusters()[0].getIndices())) cornelltest.assert_equals(set([1, 2]), set(km1.getClusters()[1].getIndices())) # partition and repeat -- should not change clusters. km1._partition() cornelltest.assert_equals(set([0, 3]), set(km1.getClusters()[0].getIndices())) cornelltest.assert_equals(set([1, 2]), set(km1.getClusters()[1].getIndices())) # Reset the cluster centroids; now it changes cluster = km1.getClusters() cluster[0]._centroid = [5.0, 10.0] cluster[1]._centroid = [0.0, 2.0] km1._partition() cornelltest.assert_equals(set([2, 3]), set(km1.getClusters()[0].getIndices())) cornelltest.assert_equals(set([0, 1]), set(km1.getClusters()[1].getIndices())) print ' Method ClusterGroup._partition() looks okay' print ' Part B of class ClusterGroup appears correct' print ''
def test_autocomplete(): """Test search function autocomplete""" print 'Testing function autocomplete' expected = ['the', 'that'] pmap = {'th': ['a', 'e'], 'the': [''], 'tha': ['t'], 'that': ['']} words = a4.autocomplete('th', pmap) assert_lists_equal(expected, words) cornelltest.assert_true('the' and 'that' in words) expected = [] pmap = a4.word_list_to_pmap(a4.build_word_list('short.txt')) words = a4.autocomplete('z', pmap) assert_lists_equal(expected, words) expected = a4.build_word_list('short.txt') pmap = a4.word_list_to_pmap(a4.build_word_list('short.txt')) words = a4.autocomplete('', pmap) assert_lists_equal(expected, words) cornelltest.assert_true('the' and 'that' and 'be' \ and 'two' and 'of' and 'a' and 'have' and 'it' and 'in' in words) expected = ['to', 'that', 'the'] pmap = a4.word_list_to_pmap(a4.build_word_list('short.txt')) words = a4.autocomplete('t', pmap) assert_lists_equal(expected, words) cornelltest.assert_true('to' and 'that' and 'the' in words)
def test_kmeans_b(): """Test Part B of the ClusterGroup class.""" # This function tests the methods _nearest_cluster and _partition, # both of which are private methods. Normally it's not good form to # directly call these methods from outside the class, but we make an # exception for testing code, which often has to be more tightly # integrated with the implementation of a class than other code that # just uses the class. print ' Testing Part B of class ClusterGroup' # Reinitialize data set items = [[0.,0.], [10.,1.], [10.,10.], [0.,9.]] dset = a6.Dataset(2, items) km1 = a6.ClusterGroup(dset, 2, [0,2]) km2 = a6.ClusterGroup(dset, 3, [0,2,3]) nearest = km1._nearest_cluster([1.,1.]) cornelltest.assert_true(nearest is km1.getClusters()[0]) nearest = km1._nearest_cluster([1.,10.]) cornelltest.assert_true(nearest is km1.getClusters()[1]) nearest = km2._nearest_cluster([1.,1.]) cornelltest.assert_true(nearest is km2.getClusters()[0]) nearest = km2._nearest_cluster([1.,10.]) cornelltest.assert_true(nearest is km2.getClusters()[2]) print ' Method ClusterGroup._nearest_cluster() looks okay' # Testing partition() # For this example points 0 and 3 are closer, as are 1 and 2 km1._partition() cornelltest.assert_equals(set([0,3]), set(km1.getClusters()[0].getIndices())) cornelltest.assert_equals(set([1,2]), set(km1.getClusters()[1].getIndices())) # partition and repeat -- should not change clusters. km1._partition() cornelltest.assert_equals(set([0,3]), set(km1.getClusters()[0].getIndices())) cornelltest.assert_equals(set([1,2]), set(km1.getClusters()[1].getIndices())) # Reset the cluster centroids; now it changes cluster = km1.getClusters() cluster[0]._centroid = [5.0, 10.0] cluster[1]._centroid = [0.0, 2.0] km1._partition() cornelltest.assert_equals(set([2,3]), set(km1.getClusters()[0].getIndices())) cornelltest.assert_equals(set([0,1]), set(km1.getClusters()[1].getIndices())) print ' Method ClusterGroup._partition() looks okay' print ' Part B of class ClusterGroup appears correct' print ''
def testD(): result = a1.iscurrency('AED') cornelltest.assert_true(result == True) result = a1.iscurrency('LOL') cornelltest.assert_true(result == False) result = a1.iscurrency('MOM') cornelltest.assert_true(result == False) result = a1.iscurrency('USD') cornelltest.assert_true(result == True) #Test exchange(currency_from, currency_to, amount_from) result = a1.exchange('USD', 'EUR', 2.5) cornelltest.assert_floats_equal(2.24075, result) result = a1.exchange('CAD', 'CNY', 1.0) cornelltest.assert_floats_equal(5.1369278716282, result) result = a1.exchange('CAD', 'CNY', 1.09) cornelltest.assert_floats_equal(5.5992513800748, result) result = a1.exchange('CAD', 'CNY', 1.09999) cornelltest.assert_floats_equal(5.6505692895124, result)
def test_pmap_add_word(): """Test function pmap_add_word""" print 'Testing pmap_add_word' # Start with an empty prefix map pmap = {} a4.pmap_add_word(pmap, 'a') # Verify that pmap now has two keys (space and 'a') # Note use of helper to make this test easier to read cornelltest.assert_equals(2, len(pmap)) cornelltest.assert_true('' in pmap) assert_lists_equal(['a'], pmap['']) cornelltest.assert_true('a' in pmap) assert_lists_equal([''], pmap['a']) # Add something with two letters a4.pmap_add_word(pmap, 'by') # Verify the keys again cornelltest.assert_equals(4, len(pmap)) cornelltest.assert_true('' in pmap) assert_lists_equal(['a', 'b'], pmap['']) cornelltest.assert_true('a' in pmap) assert_lists_equal([''], pmap['a']) cornelltest.assert_true('b' in pmap) assert_lists_equal(['y'], pmap['b']) cornelltest.assert_true('by' in pmap) assert_lists_equal([''], pmap['by']) # One last time with overlap a4.pmap_add_word(pmap, 'at') # Verify the keys again cornelltest.assert_equals(5, len(pmap)) cornelltest.assert_true('' in pmap) assert_lists_equal(['a', 'b'], pmap['']) cornelltest.assert_true('a' in pmap) assert_lists_equal(['', 't'], pmap['a']) cornelltest.assert_true('at' in pmap) assert_lists_equal([''], pmap['at']) cornelltest.assert_true('b' in pmap) assert_lists_equal(['y'], pmap['b']) cornelltest.assert_true('by' in pmap) assert_lists_equal([''], pmap['by']) a4.pmap_add_word(pmap, 'that') cornelltest.assert_true('that' in pmap) cornelltest.assert_true('' in pmap['that']) assert_lists_equal(['h'], pmap['t']) cornelltest.assert_true('th' in pmap) assert_lists_equal(['a'], pmap['th']) cornelltest.assert_true('tha' in pmap) assert_lists_equal(['t'], pmap['tha']) assert_lists_equal([''], pmap['that']) cornelltest.assert_equals(9, len(pmap)) cornelltest.assert_true('' in pmap) assert_lists_equal(['a', 'b', 't'], pmap['']) cornelltest.assert_true('a' in pmap) assert_lists_equal(['', 't'], pmap['a']) cornelltest.assert_true('at' in pmap) assert_lists_equal([''], pmap['at']) cornelltest.assert_true('b' in pmap) assert_lists_equal(['y'], pmap['b']) cornelltest.assert_true('by' in pmap) assert_lists_equal([''], pmap['by'])
def test_word_list_to_pmap(): """Test function word_list_to_pmap""" print 'Testing function word_list_to_pmap' # Start with an empty word list words = [] pmap = a4.word_list_to_pmap(words) # Should be empty dictionary cornelltest.assert_equals(dict, type(pmap)) cornelltest.assert_equals(0, len(pmap)) # One word, two letters words = ['at'] pmap = a4.word_list_to_pmap(words) # Similar test format to pmap_add_word cornelltest.assert_equals(3, len(pmap)) assert_lists_equal(['a'], pmap['']) cornelltest.assert_true('a' in pmap) assert_lists_equal(['t'], pmap['a']) cornelltest.assert_true('at' in pmap) assert_lists_equal([''], pmap['at']) # Several words words = ['at', 'by', 'a'] pmap = a4.word_list_to_pmap(words) # Similar test format to pmap_add_word cornelltest.assert_equals(5, len(pmap)) cornelltest.assert_true('' in pmap) assert_lists_equal(['a', 'b'], pmap['']) cornelltest.assert_true('a' in pmap) assert_lists_equal(['', 't'], pmap['a']) cornelltest.assert_true('at' in pmap) assert_lists_equal([''], pmap['at']) cornelltest.assert_true('b' in pmap) assert_lists_equal(['y'], pmap['b']) cornelltest.assert_true('by' in pmap) assert_lists_equal([''], pmap['by'])
def test_kmeans_c(): """Test Part C of the ClusterGroup class.""" print ' Testing Part C of class ClusterGroup' items = [[0., 0.], [10., 1.], [10., 10.], [0., 9.]] dset = a6.Dataset(2, items) km1 = a6.ClusterGroup(dset, 2, [0, 2]) km1._partition() # Test update() stable = km1._update() cornelltest.assert_float_lists_equal([0, 4.5], km1.getClusters()[0].getCentroid()) cornelltest.assert_float_lists_equal([10.0, 5.5], km1.getClusters()[1].getCentroid()) cornelltest.assert_false(stable) # updating again should not change anything, but should return stable stable = km1._update() cornelltest.assert_float_lists_equal([0, 4.5], km1.getClusters()[0].getCentroid()) cornelltest.assert_float_lists_equal([10.0, 5.5], km1.getClusters()[1].getCentroid()) cornelltest.assert_true(stable) print ' Method ClusterGroup._update() looks okay' # Now test the k-means process itself. # FOR ALL TEST CASES # Create and initialize a non-empty dataset items = [[0.5, 0.5, 0.5], [0.5, 0.6, 0.6], [0.6, 0.5, 0.6], [0.5, 0.6, 0.5], [0.5, 0.4, 0.5], [0.5, 0.4, 0.4]] dset = a6.Dataset(3, items) # Create a clustering, providing non-random seed indices so the test is deterministic km2 = a6.ClusterGroup(dset, 2, [1, 3]) # PRE-TEST: Check first cluster (should be okay if passed part D) cluster1 = km2.getClusters()[0] cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6], cluster1.getCentroid()) cornelltest.assert_equals(set([]), set(cluster1.getIndices())) # PRE-TEST: Check second cluster (should be okay if passed part D) cluster2 = km2.getClusters()[1] cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5], cluster2.getCentroid()) cornelltest.assert_equals(set([]), set(cluster2.getIndices())) # Make a fake cluster to test update_centroid() method clustertest = a6.Cluster(dset, [0.5, 0.6, 0.6]) for ind in [1, 2]: clustertest.addIndex(ind) # TEST CASE 1 (update) stable = clustertest.updateCentroid() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], clustertest.getCentroid()) cornelltest.assert_false(stable) # Not yet stable # TEST CASE 2 (update) stable = clustertest.updateCentroid() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], clustertest.getCentroid()) cornelltest.assert_true(stable) # Now it is stable # TEST CASE 3 (step) km2.step() # Check first cluster (WHICH HAS CHANGED!) cluster1 = km2.getClusters()[0] cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], cluster1.getCentroid()) cornelltest.assert_equals(set([1, 2]), set(cluster1.getIndices())) # Check second cluster (WHICH HAS CHANGED!) cluster2 = km2.getClusters()[1] cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475], cluster2.getCentroid()) cornelltest.assert_equals(set([0, 3, 4, 5]), set(cluster2.getIndices())) # TEST CASE 3 (step) km2.step() # Check first cluster (WHICH HAS CHANGED!) cluster1 = km2.getClusters()[0] cornelltest.assert_float_lists_equal([8. / 15, 17. / 30, 17. / 30], cluster1.getCentroid()) cornelltest.assert_equals(set([1, 2, 3]), set(cluster1.getIndices())) # Check second cluster (WHICH HAS CHANGED!) cluster2 = km2.getClusters()[1] cornelltest.assert_float_lists_equal([0.5, 13. / 30, 14. / 30], cluster2.getCentroid()) cornelltest.assert_equals(set([0, 4, 5]), set(cluster2.getIndices())) # Try it on a file km3 = candy_to_kmeans('datasets/smallcandy.csv', 3, [23, 54, 36]) km3.step() # The actual results cluster0 = km3.getClusters()[0] cluster1 = km3.getClusters()[1] cluster2 = km3.getClusters()[2] # The "correct" answers contents0 = [[0.88, 0.84, 0.8, 0.3], [0.02, 0.67, 0.75, 0.61], [0.2, 0.54, 0.73, 0.85], [0.62, 0.75, 0.65, 0.43], [0.35, 0.63, 0.65, 0.12], [0.61, 0.85, 0.81, 0.44], [0.95, 0.94, 0.98, 0.69], [0.04, 0.69, 0.38, 0.39], [0.04, 0.52, 0.99, 0.75], [0.28, 0.91, 0.63, 0.08], [0.14, 0.55, 0.67, 0.63], [0.38, 0.94, 0.53, 0.07], [0.08, 0.62, 0.32, 0.27], [0.69, 0.82, 0.75, 0.65], [0.84, 0.89, 0.91, 0.38], [0.22, 0.88, 0.39, 0.33], [0.39, 0.38, 0.85, 0.32], [0.26, 0.39, 0.95, 0.63], [0.15, 0.87, 0.62, 0.22], [0.65, 0.81, 0.69, 0.55], [0.27, 0.63, 0.69, 0.39], [0.35, 0.7, 0.41, 0.15], [0.2, 0.48, 0.98, 0.84], [0.76, 0.86, 0.74, 0.61], [0.27, 0.65, 0.52, 0.28], [0.86, 0.91, 0.88, 0.62], [0.1, 0.79, 0.5, 0.12], [0.09, 0.85, 0.55, 0.21], [0.79, 0.94, 0.83, 0.48], [0.73, 0.92, 0.74, 0.39], [0.31, 0.5, 0.87, 0.85], [0.39, 0.9, 0.52, 0.26], [0.46, 0.35, 0.96, 0.05], [0.21, 0.62, 0.33, 0.09], [0.58, 0.37, 0.9, 0.08], [0.54, 0.92, 0.36, 0.35], [0.36, 0.64, 0.57, 0.26], [0.09, 0.47, 0.63, 0.8], [0.4, 0.69, 0.74, 0.7]] contents1 = [[0.32, 0.87, 0.14, 0.68], [0.87, 0.99, 0.2, 0.8], [0.86, 0.86, 0.32, 0.88], [0.81, 0.66, 0.26, 0.82], [0.91, 0.98, 0.61, 0.58], [0.84, 0.88, 0.04, 0.86], [0.8, 0.62, 0.09, 0.65], [0.72, 0.88, 0.02, 0.95], [0.88, 0.96, 0.09, 0.88]] contents2 = [[0.4, 0.21, 0.78, 0.68], [0.54, 0.06, 0.81, 0.98], [0.73, 0.31, 0.15, 0.08], [0.81, 0.69, 0.65, 0.65], [0.14, 0.31, 0.86, 0.74], [0.77, 0.45, 0.31, 0.31], [0.39, 0.14, 0.99, 0.24], [0.23, 0.32, 0.7, 0.75], [0.65, 0.05, 0.39, 0.49], [0.96, 0.09, 0.49, 0.3], [0.86, 0.03, 0.3, 0.39], [0.5, 0.2, 0.69, 0.95], [0.79, 0.09, 0.41, 0.69], [0.4, 0.3, 0.78, 0.74], [0.65, 0.24, 0.63, 0.27], [0.35, 0.3, 0.94, 0.92], [0.71, 0.78, 0.64, 0.57], [0.8, 0.4, 0.23, 0.33], [0.38, 0.07, 0.82, 0.01], [0.66, 0.09, 0.69, 0.46], [0.54, 0.06, 0.74, 0.86], [0.95, 0.62, 0.28, 0.01], [0.35, 0.71, 0.01, 0.32], [0.62, 0.24, 0.77, 0.17], [0.73, 0.65, 0.23, 0.02], [0.27, 0.38, 0.76, 0.63], [0.9, 0.63, 0.83, 0.6], [0.7, 0.04, 0.7, 0.82], [0.95, 0.83, 0.64, 0.5], [0.41, 0.11, 0.61, 0.78], [0.22, 0.44, 0.67, 0.99], [0.51, 0.05, 0.95, 0.66], [0.99, 0.68, 0.8, 0.42], [0.72, 0.55, 0.1, 0.17], [0.44, 0.1, 0.61, 0.98], [0.31, 0.16, 0.95, 0.9], [0.61, 0.42, 0.24, 0.33], [0.89, 0.72, 0.78, 0.38], [0.5, 0.09, 0.84, 0.78], [0.62, 0.01, 0.88, 0.1], [0.44, 0.28, 0.88, 0.99], [0.57, 0.23, 0.6, 0.85], [0.9, 0.05, 0.34, 0.41], [0.9, 0.41, 0.27, 0.36], [0.67, 0.32, 0.66, 0.2], [0.72, 0.14, 0.63, 0.37], [0.39, 0.08, 0.77, 0.96], [0.9, 0.7, 0.74, 0.63], [0.63, 0.05, 0.52, 0.63], [0.62, 0.27, 0.67, 0.77], [0.35, 0.04, 0.85, 0.86], [0.36, 0.34, 0.75, 0.37]] centroid0 = [ 0.3987179487179487, 0.7097435897435899, 0.6864102564102561, 0.4164102564102565 ] centroid1 = [ 0.7788888888888889, 0.8555555555555555, 0.19666666666666668, 0.788888888888889 ] centroid2 = [ 0.6038461538461538, 0.29865384615384616, 0.6217307692307692, 0.5455769230769231 ] cornelltest.assert_float_lists_equal(centroid0, cluster0.getCentroid()) cornelltest.assert_float_lists_equal(centroid1, cluster1.getCentroid()) cornelltest.assert_float_lists_equal(centroid2, cluster2.getCentroid()) cornelltest.assert_float_lists_equal(contents0, cluster0.getContents()) cornelltest.assert_float_lists_equal(contents1, cluster1.getContents()) cornelltest.assert_float_lists_equal(contents2, cluster2.getContents()) print ' Method ClusterGroup.step looks okay' print ' Part C of class ClusterGroup appears correct' print ''
def test_kmeans_b(): """Test Part B of the ClusterGroup class.""" # This function tests the methods _nearest_cluster and _partition, # both of which are private methods. Normally it's not good form to # directly call these methods from outside the class, but we make an # exception for testing code, which often has to be more tightly # integrated with the implementation of a class than other code that # just uses the class. print ' Testing Part B of class ClusterGroup' # Reinitialize data set items = [[0., 0.], [10., 1.], [10., 10.], [0., 9.]] dset = a6.Dataset(2, items) km1 = a6.ClusterGroup(dset, 2, [0, 2]) km2 = a6.ClusterGroup(dset, 3, [0, 2, 3]) nearest = km1._nearest_cluster([1., 1.]) cornelltest.assert_true(nearest is km1.getClusters()[0]) nearest = km1._nearest_cluster([1., 10.]) cornelltest.assert_true(nearest is km1.getClusters()[1]) nearest = km2._nearest_cluster([1., 1.]) cornelltest.assert_true(nearest is km2.getClusters()[0]) nearest = km2._nearest_cluster([1., 10.]) cornelltest.assert_true(nearest is km2.getClusters()[2]) print ' Method ClusterGroup._nearest_cluster() looks okay' # Testing partition() # For this example points 0 and 3 are closer, as are 1 and 2 km1._partition() cornelltest.assert_equals(set([0, 3]), set(km1.getClusters()[0].getIndices())) cornelltest.assert_equals(set([1, 2]), set(km1.getClusters()[1].getIndices())) # partition and repeat -- should not change clusters. km1._partition() cornelltest.assert_equals(set([0, 3]), set(km1.getClusters()[0].getIndices())) cornelltest.assert_equals(set([1, 2]), set(km1.getClusters()[1].getIndices())) # Reset the cluster centroids; now it changes cluster = km1.getClusters() cluster[0]._centroid = [5.0, 10.0] cluster[1]._centroid = [0.0, 2.0] km1._partition() cornelltest.assert_equals(set([2, 3]), set(km1.getClusters()[0].getIndices())) cornelltest.assert_equals(set([0, 1]), set(km1.getClusters()[1].getIndices())) # Try it on a file index1 = [ 2, 3, 5, 9, 11, 15, 16, 18, 19, 20, 22, 23, 29, 30, 32, 33, 37, 40, 41, 42, 44, 45, 50, 60, 61, 62, 64, 69, 71, 73, 75, 76, 78, 80, 85, 88, 90, 94, 97 ] index2 = [0, 34, 8, 43, 66, 46, 77, 84, 54] index3 = [ 1, 4, 6, 7, 10, 12, 13, 14, 17, 21, 24, 25, 26, 27, 28, 31, 35, 36, 38, 39, 47, 48, 49, 51, 52, 53, 55, 56, 57, 58, 59, 63, 65, 67, 68, 70, 72, 74, 79, 81, 82, 83, 86, 87, 89, 91, 92, 93, 95, 96, 98, 99 ] km3 = candy_to_kmeans('datasets/smallcandy.csv', 3, [23, 54, 36]) km3._partition() cornelltest.assert_equals(set(index1), set(km3.getClusters()[0].getIndices())) cornelltest.assert_equals(set(index2), set(km3.getClusters()[1].getIndices())) cornelltest.assert_equals(set(index3), set(km3.getClusters()[2].getIndices())) print ' Method ClusterGroup._partition() looks okay' print ' Part B of class ClusterGroup appears correct' print ''
def test_kmeans_c(): """Test Part C of the ClusterGroup class.""" print ' Testing Part C of class ClusterGroup' items = [[0.,0.], [10.,1.], [10.,10.], [0.,9.]] dset = a6.Dataset(2, items) km1 = a6.ClusterGroup(dset, 2, [0,2]) km1._partition() # Test update() stable = km1._update() cornelltest.assert_float_lists_equal([0,4.5], km1.getClusters()[0].getCentroid()) cornelltest.assert_float_lists_equal([10.0,5.5], km1.getClusters()[1].getCentroid()) cornelltest.assert_false(stable) # updating again should not change anything, but should return stable stable = km1._update() cornelltest.assert_float_lists_equal([0,4.5], km1.getClusters()[0].getCentroid()) cornelltest.assert_float_lists_equal([10.0,5.5], km1.getClusters()[1].getCentroid()) cornelltest.assert_true(stable) print ' Method ClusterGroup._update() looks okay' # Now test the k-means process itself. # FOR ALL TEST CASES # Create and initialize a non-empty dataset items = [[0.5,0.5,0.5],[0.5,0.6,0.6],[0.6,0.5,0.6],[0.5,0.6,0.5],[0.5,0.4,0.5],[0.5,0.4,0.4]] dset = a6.Dataset(3,items) # Create a clustering, providing non-random seed indices so the test is deterministic km2 = a6.ClusterGroup(dset, 2, [1, 3]) # PRE-TEST: Check first cluster (should be okay if passed part D) cluster1 = km2.getClusters()[0] cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6], cluster1.getCentroid()) cornelltest.assert_equals(set([]), set(cluster1.getIndices())) # PRE-TEST: Check second cluster (should be okay if passed part D) cluster2 = km2.getClusters()[1] cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5], cluster2.getCentroid()) cornelltest.assert_equals(set([]), set(cluster2.getIndices())) # Make a fake cluster to test update_centroid() method clustertest = a6.Cluster(dset, [0.5, 0.6, 0.6]) for ind in [1, 2]: clustertest.addIndex(ind) # TEST CASE 1 (update) stable = clustertest.updateCentroid() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid()) cornelltest.assert_false(stable) # Not yet stable # TEST CASE 2 (update) stable = clustertest.updateCentroid() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid()) cornelltest.assert_true(stable) # Now it is stable # TEST CASE 3 (step) km2.step() # Check first cluster (WHICH HAS CHANGED!) cluster1 = km2.getClusters()[0] cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], cluster1.getCentroid()) cornelltest.assert_equals(set([1, 2]), set(cluster1.getIndices())) # Check second cluster (WHICH HAS CHANGED!) cluster2 = km2.getClusters()[1] cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475],cluster2.getCentroid()) cornelltest.assert_equals(set([0, 3, 4, 5]), set(cluster2.getIndices())) # TEST CASE 3 (step) km2.step() # Check first cluster (WHICH HAS CHANGED!) cluster1 = km2.getClusters()[0] cornelltest.assert_float_lists_equal([8./15, 17./30, 17./30], cluster1.getCentroid()) cornelltest.assert_equals(set([1, 2, 3]), set(cluster1.getIndices())) # Check second cluster (WHICH HAS CHANGED!) cluster2 = km2.getClusters()[1] cornelltest.assert_float_lists_equal([0.5, 13./30, 14./30],cluster2.getCentroid()) cornelltest.assert_equals(set([0, 4, 5]), set(cluster2.getIndices())) print ' Method ClusterGroup.step looks okay' print ' Part C of class ClusterGroup appears correct' print ''
def test_word_list_to_pmap(): """Test function word_list_to_pmap""" print 'Testing function word_list_to_pmap' # Start with an empty word list words = [] pmap = a4.word_list_to_pmap(words) # Should be empty dictionary cornelltest.assert_equals(dict, type(pmap)) cornelltest.assert_equals(0, len(pmap)) # One word, two letters words = ['at'] pmap = a4.word_list_to_pmap(words) # Similar test format to pmap_add_word cornelltest.assert_equals(3, len(pmap)) assert_lists_equal(['a'], pmap['']) cornelltest.assert_true('a' in pmap) assert_lists_equal(['t'], pmap['a']) cornelltest.assert_true('at' in pmap) assert_lists_equal([''], pmap['at']) # Several words words = ['at', 'by', 'a'] pmap = a4.word_list_to_pmap(words) # Similar test format to pmap_add_word cornelltest.assert_equals(5, len(pmap)) cornelltest.assert_true('' in pmap) assert_lists_equal(['a','b'], pmap['']) cornelltest.assert_true('a' in pmap) assert_lists_equal(['','t'], pmap['a']) cornelltest.assert_true('at' in pmap) assert_lists_equal([''], pmap['at']) cornelltest.assert_true('b' in pmap) assert_lists_equal(['y'], pmap['b']) cornelltest.assert_true('by' in pmap) assert_lists_equal([''], pmap['by'])
def testE(): """Test Part E (of Part I) of the assignment. This tests the final part of K-means. It gets a lot easier from here. As with the test for Part D, we have to use random.seed to fix the random number generator.""" print ' Testing Part E' # Force the random number generator to not be random random.seed(3) # More interesting result than a seed of 1 # FOR ALL TEST CASES # Create and initialize a non-empty database items = [[0.5, 0.5, 0.5], [0.5, 0.6, 0.6], [0.6, 0.5, 0.6], [0.5, 0.6, 0.5], [0.5, 0.4, 0.5], [0.5, 0.4, 0.4]] dbase = Database(3, items) dbase.setKSize(2) # PRE-TEST: Check first cluster (should be okay if passed part D) cluster1 = dbase.getCluster(0) cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6], cluster1.getCentroid()) cornelltest.assert_float_lists_equal(items[1:3], cluster1.getContents()) # PRE-TEST: Check second cluster (should be okay if passed part D) cluster2 = dbase.getCluster(1) cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5], cluster2.getCentroid()) cornelltest.assert_float_lists_equal([items[0]] + items[3:], cluster2.getContents()) # Make a copy of a cluster (to test update() method) clustertest = Cluster(cluster1.getCentroid()) for point in cluster1.getContents(): clustertest.appendContents(point) # TEST CASE 1 (update) stable = clustertest.update() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], clustertest.getCentroid()) cornelltest.assert_false(stable) # Not yet stable # TEST CASE 2 (update) stable = clustertest.update() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], clustertest.getCentroid()) cornelltest.assert_true(stable) # Now it is stable print ' Method update() looks okay' # TEST CASE 3 (step) dbase.step() # K size should be unchanged cornelltest.assert_equals(2, dbase.getKSize()) # Check first cluster (WHICH HAS CHANGED!) cluster1 = dbase.getCluster(0) cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], cluster1.getCentroid()) cornelltest.assert_float_lists_equal(items[1:4], cluster1.getContents()) # Check second cluster (WHICH HAS CHANGED!) cluster2 = dbase.getCluster(1) cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475], cluster2.getCentroid()) cornelltest.assert_float_lists_equal([items[0]] + items[4:], cluster2.getContents()) print ' Method step() looks okay' print ' Part E appears correct'
def test_pmap_add_word(): """Test function pmap_add_word""" print 'Testing pmap_add_word' # Start with an empty prefix map pmap = {} a4.pmap_add_word(pmap,'a') # Verify that pmap now has two keys (space and 'a') # Note use of helper to make this test easier to read cornelltest.assert_equals(2, len(pmap)) cornelltest.assert_true('' in pmap) assert_lists_equal(['a'], pmap['']) cornelltest.assert_true('a' in pmap) assert_lists_equal([''], pmap['a']) # Add something with two letters a4.pmap_add_word(pmap,'by') # Verify the keys again cornelltest.assert_equals(4, len(pmap)) cornelltest.assert_true('' in pmap) assert_lists_equal(['a','b'], pmap['']) cornelltest.assert_true('a' in pmap) assert_lists_equal([''], pmap['a']) cornelltest.assert_true('b' in pmap) assert_lists_equal(['y'], pmap['b']) cornelltest.assert_true('by' in pmap) assert_lists_equal([''], pmap['by']) # One last time with overlap a4.pmap_add_word(pmap,'at') # Verify the keys again cornelltest.assert_equals(5, len(pmap)) cornelltest.assert_true('' in pmap) assert_lists_equal(['a','b'], pmap['']) cornelltest.assert_true('a' in pmap) assert_lists_equal(['','t'], pmap['a']) cornelltest.assert_true('at' in pmap) assert_lists_equal([''], pmap['at']) cornelltest.assert_true('b' in pmap) assert_lists_equal(['y'], pmap['b']) cornelltest.assert_true('by' in pmap) assert_lists_equal([''], pmap['by']) a4.pmap_add_word(pmap,'that') cornelltest.assert_true('that' in pmap) cornelltest.assert_true('' in pmap['that']) a4.pmap_add_word(pmap,'goop') cornelltest.assert_true('goop' in pmap) cornelltest.assert_true('o' in pmap['go']) cornelltest.assert_true('p' in pmap['goo'])