def testE(): """Test Part E (of Part I) of the assignment. This tests the final part of K-means. It gets a lot easier from here. As with the test for Part D, we have to use random.seed to fix the random number generator.""" print ' Testing Part E' # Force the random number generator to not be random random.seed(3) # More interesting result than a seed of 1 # FOR ALL TEST CASES # Create and initialize a non-empty database items = [[0.5,0.5,0.5],[0.5,0.6,0.6],[0.6,0.5,0.6],[0.5,0.6,0.5],[0.5,0.4,0.5],[0.5,0.4,0.4]] dbase = Database(3,items) dbase.setKSize(2) # PRE-TEST: Check first cluster (should be okay if passed part D) cluster1 = dbase.getCluster(0) cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6],cluster1.getCentroid()) cornelltest.assert_float_lists_equal(items[1:3],cluster1.getContents()) # PRE-TEST: Check second cluster (should be okay if passed part D) cluster2 = dbase.getCluster(1) cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5],cluster2.getCentroid()) cornelltest.assert_float_lists_equal([items[0]]+items[3:],cluster2.getContents()) # Make a copy of a cluster (to test update() method) clustertest = Cluster(cluster1.getCentroid()) for point in cluster1.getContents(): clustertest.appendContents(point) # TEST CASE 1 (update) stable = clustertest.update() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid()) cornelltest.assert_false(stable) # Not yet stable # TEST CASE 2 (update) stable = clustertest.update() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid()) cornelltest.assert_true(stable) # Now it is stable print ' Method update() looks okay' # TEST CASE 3 (step) dbase.step() # K size should be unchanged cornelltest.assert_equals(2,dbase.getKSize()) # Check first cluster (WHICH HAS CHANGED!) cluster1 = dbase.getCluster(0) cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],cluster1.getCentroid()) cornelltest.assert_float_lists_equal(items[1:4],cluster1.getContents()) # Check second cluster (WHICH HAS CHANGED!) cluster2 = dbase.getCluster(1) cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475],cluster2.getCentroid()) cornelltest.assert_float_lists_equal([items[0]]+items[4:],cluster2.getContents()) print ' Method step() looks okay' print ' Part E appears correct'
def test_dataset(): """Test the Dataset class.""" print ' Testing class Dataset' # TEST CASE 1 # Create and test an empty dataset dset1 = a6.Dataset(3) cornelltest.assert_equals(3, dset1.getDimension()) cornelltest.assert_equals(0, dset1.getSize()) # We use this assert function to compare lists cornelltest.assert_float_lists_equal([], dset1.getContents()) print ' Default initialization looks okay' # TEST CASE 2 # Create and test a non-empty dataset items = [[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]] dset2 = a6.Dataset(3, items) cornelltest.assert_equals(3, dset2.getDimension()) cornelltest.assert_equals(4, dset2.getSize()) # Check that contents is initialized correctly # Make sure items is COPIED cornelltest.assert_float_lists_equal(items, dset2.getContents()) cornelltest.assert_false(dset2.getContents() is items) cornelltest.assert_false(dset2.getContents()[0] is items[0]) print ' User-provided initialization looks okay' # Check that getPoint() is correct AND that it copies cornelltest.assert_float_lists_equal([0.0, 1.0, 0.0], dset2.getPoint(2)) cornelltest.assert_false(dset2.getContents()[2] is dset2.getPoint(2)) print ' Method Dataset.getPoint looks okay' # Add something to the dataset (and check it was added) dset1.addPoint([0.0, 0.5, 4.2]) cornelltest.assert_float_lists_equal([[0.0, 0.5, 4.2]], dset1.getContents()) cornelltest.assert_float_lists_equal([0.0, 0.5, 4.2], dset1.getPoint(0)) # Check the point is COPIED cornelltest.assert_false(dset1.getPoint(0) is dset1.getContents()[0]) extra = [0.0, 0.5, 4.2] dset2.addPoint(extra) items.append(extra) cornelltest.assert_float_lists_equal(items, dset2.getContents()) # Check the point was COPIED cornelltest.assert_false(id(extra) in map(id, dset2.getContents())) print ' Method Dataset.addPoint looks okay' print ' class Dataset appears correct' print ''
def test_dataset(): """Test the Dataset class.""" print ' Testing class Dataset' # TEST CASE 1 # Create and test an empty dataset dset1 = a6.Dataset(3) cornelltest.assert_equals(3,dset1.getDimension()) cornelltest.assert_equals(0,dset1.getSize()) # We use this assert function to compare lists cornelltest.assert_float_lists_equal([],dset1.getContents()) print ' Default initialization looks okay' # TEST CASE 2 # Create and test a non-empty dataset items = [[0.0,0.0,0.0],[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]] dset2 = a6.Dataset(3,items) cornelltest.assert_equals(3,dset2.getDimension()) cornelltest.assert_equals(4,dset2.getSize()) # Check that contents is initialized correctly # Make sure items is COPIED cornelltest.assert_float_lists_equal(items,dset2.getContents()) cornelltest.assert_false(dset2.getContents() is items) cornelltest.assert_false(dset2.getContents()[0] is items[0]) print ' User-provided initialization looks okay' # Check that getPoint() is correct AND that it copies cornelltest.assert_float_lists_equal([0.0,1.0,0.0],dset2.getPoint(2)) cornelltest.assert_false(dset2.getContents()[2] is dset2.getPoint(2)) print ' Method Dataset.getPoint looks okay' # Add something to the dataset (and check it was added) dset1.addPoint([0.0,0.5,4.2]) cornelltest.assert_float_lists_equal([[0.0,0.5,4.2]],dset1.getContents()) cornelltest.assert_float_lists_equal([0.0,0.5,4.2],dset1.getPoint(0)) # Check the point is COPIED cornelltest.assert_false(dset1.getPoint(0) is dset1.getContents()[0]) extra = [0.0,0.5,4.2] dset2.addPoint(extra) items.append(extra) cornelltest.assert_float_lists_equal(items,dset2.getContents()) # Check the point was COPIED cornelltest.assert_false(id(extra) in map(id,dset2.getContents())) print ' Method Dataset.addPoint looks okay' print ' class Dataset appears correct' print ''
def testA(): """Test Part A (of Part I) of the assignment. This test procedure cannot test getCluster, as there are no clusters yet. That test is moved to part B.""" print ' Testing Part A' # TEST CASE 1 # Create and test an empty database dbase = Database(3) cornelltest.assert_equals(3, dbase.getDimension()) cornelltest.assert_equals(0, dbase.getKSize()) # We use this BRAND NEW ASSERT to compare lists cornelltest.assert_float_lists_equal([], dbase.getContents()) # Add something to the database (and check it was added) dbase.appendContents([0.0, 0.5, 4.2]) # Database is a 2D-list. cornelltest.assert_float_lists_equal([[0.0, 0.5, 4.2]], dbase.getContents()) # And clear it dbase.clearContents() cornelltest.assert_float_lists_equal([], dbase.getContents()) print ' Default initialization looks okay' # TEST CASE 2 # Create and test a non-empty database items = [[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]] dbase = Database(3, items) cornelltest.assert_equals(3, dbase.getDimension()) cornelltest.assert_equals(0, dbase.getKSize()) # Check that contents is initialized correctly # Make sure items is COPIED cornelltest.assert_float_lists_equal(items, dbase.getContents()) cornelltest.assert_not_equals(id(items), id(dbase.getContents())) # Add something to the database (and check it was added) extra = [0.0, 0.5, 4.2] dbase.appendContents(extra) items.append(extra) cornelltest.assert_float_lists_equal(items, dbase.getContents()) # Check the point was COPIED cornelltest.assert_false(id(extra) in map(id, dbase.getContents())) # And clear it dbase.clearContents() cornelltest.assert_float_lists_equal([], dbase.getContents()) print ' User-given contents looks okay' print ' Part A appears correct'
def testB(): """Test Part B (of Part I) of the assignment. This test procedure includes getCluster from part A. To test getCluster, we have to initialize your _clusters attribute. We can only do this by accessing the hidden attribute _clusters in this function. Normally, this is bad programming (hidden attributes can be used inside of the class definition, but not outside). But sometimes rules are meant to be broken, and testing is a good time to break rules.""" print ' Testing Part B' # TEST CASE 1 # Create and test a cluster (always empty) point = [0.0, 1.0, 0.0] cluster1 = Cluster(point) # Compare centroid and contents cornelltest.assert_float_lists_equal(point, cluster1.getCentroid()) cornelltest.assert_float_lists_equal([], cluster1.getContents()) # Make sure centroid COPIED cornelltest.assert_not_equals(id(point), id(cluster1.getContents())) # Add something to cluster (and check it was added) extra = [0.0, 0.5, 4.2] cluster1.appendContents(extra) # Cluster is a 2D-list. cornelltest.assert_float_lists_equal([extra], cluster1.getContents()) # Check the point was COPIED cornelltest.assert_false(id(extra) in map(id, cluster1.getContents())) # And clear it cluster1.clearContents() cornelltest.assert_float_lists_equal([], cluster1.getContents()) print ' Basic cluster methods look okay' # TEST CASE 2 (getCluster) # Make a second cluster cluster2 = Cluster([0.0, 0.0, 0.0]) # Now make a database and put these in _clusters attribute dbase = Database(3) # THIS VIOLATES GOOD PROGRAMMING. But sometimes rules must be broken. dbase._clusters = [cluster1, cluster2] dbase._ksize = 2 # Check that I get the right objects back # MUST COMPARE FOLDER IDENTIFIERS. Use the id function. cornelltest.assert_equals(id(cluster1), id(dbase.getCluster(0))) cornelltest.assert_equals(id(cluster2), id(dbase.getCluster(1))) print ' Method getCluster() looks okay' print ' Part B appears correct'
def testB(): """Test Part B (of Part I) of the assignment. This test procedure includes getCluster from part A. To test getCluster, we have to initialize your _clusters attribute. We can only do this by accessing the hidden attribute _clusters in this function. Normally, this is bad programming (hidden attributes can be used inside of the class definition, but not outside). But sometimes rules are meant to be broken, and testing is a good time to break rules.""" print ' Testing Part B' # TEST CASE 1 # Create and test a cluster (always empty) point = [0.0,1.0,0.0] cluster1 = Cluster(point) # Compare centroid and contents cornelltest.assert_float_lists_equal(point,cluster1.getCentroid()) cornelltest.assert_float_lists_equal([],cluster1.getContents()) # Make sure centroid COPIED cornelltest.assert_not_equals(id(point),id(cluster1.getContents())) # Add something to cluster (and check it was added) extra = [0.0,0.5,4.2] cluster1.appendContents(extra) # Cluster is a 2D-list. cornelltest.assert_float_lists_equal([extra],cluster1.getContents()) # Check the point was COPIED cornelltest.assert_false(id(extra) in map(id,cluster1.getContents())) # And clear it cluster1.clearContents() cornelltest.assert_float_lists_equal([],cluster1.getContents()) print ' Basic cluster methods look okay' # TEST CASE 2 (getCluster) # Make a second cluster cluster2 = Cluster([0.0,0.0,0.0]) # Now make a database and put these in _clusters attribute dbase = Database(3) # THIS VIOLATES GOOD PROGRAMMING. But sometimes rules must be broken. dbase._clusters = [cluster1,cluster2] dbase._ksize = 2 # Check that I get the right objects back # MUST COMPARE FOLDER IDENTIFIERS. Use the id function. cornelltest.assert_equals(id(cluster1),id(dbase.getCluster(0))) cornelltest.assert_equals(id(cluster2),id(dbase.getCluster(1))) print ' Method getCluster() looks okay' print ' Part B appears correct'
def testA(): """Test Part A (of Part I) of the assignment. This test procedure cannot test getCluster, as there are no clusters yet. That test is moved to part B.""" print ' Testing Part A' # TEST CASE 1 # Create and test an empty database dbase = Database(3) cornelltest.assert_equals(3,dbase.getDimension()) cornelltest.assert_equals(0,dbase.getKSize()) # We use this BRAND NEW ASSERT to compare lists cornelltest.assert_float_lists_equal([],dbase.getContents()) # Add something to the database (and check it was added) dbase.appendContents([0.0,0.5,4.2]) # Database is a 2D-list. cornelltest.assert_float_lists_equal([[0.0,0.5,4.2]],dbase.getContents()) # And clear it dbase.clearContents() cornelltest.assert_float_lists_equal([],dbase.getContents()) print ' Default initialization looks okay' # TEST CASE 2 # Create and test a non-empty database items = [[0.0,0.0,0.0],[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]] dbase = Database(3,items) cornelltest.assert_equals(3,dbase.getDimension()) cornelltest.assert_equals(0,dbase.getKSize()) # Check that contents is initialized correctly # Make sure items is COPIED cornelltest.assert_float_lists_equal(items,dbase.getContents()) cornelltest.assert_not_equals(id(items),id(dbase.getContents())) # Add something to the database (and check it was added) extra = [0.0,0.5,4.2] dbase.appendContents(extra) items.append(extra) cornelltest.assert_float_lists_equal(items,dbase.getContents()) # Check the point was COPIED cornelltest.assert_false(id(extra) in map(id,dbase.getContents())) # And clear it dbase.clearContents() cornelltest.assert_float_lists_equal([],dbase.getContents()) print ' User-given contents looks okay' print ' Part A appears correct'
def test_cluster_b(): """Test Part B of the Cluster class assignment.""" print ' Testing Part B of class Cluster' # A dataset with four points items = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0]] dset = a6.Dataset(3, items) # Create two clusters cluster2 = a6.Cluster(dset, [0.5, 0.5, 0.0]) cluster3 = a6.Cluster(dset, [0.0, 0.0, 0.5]) # TEST CASE 1 (distance) dist = cluster2.distance([1.0, 0.0, -1.0]) cornelltest.assert_floats_equal(1.22474487139, dist) # TEST CASE 2 (distance) dist = cluster2.distance([0.5, 0.5, 0.0]) cornelltest.assert_floats_equal(0.0, dist) # TEST CASE 3 (distance) dist = cluster3.distance([0.5, 0.0, 0.5]) cornelltest.assert_floats_equal(0.5, dist) print ' Method Cluster.distance() looks okay' # TEST CASE 1 (updateCentroid): centroid remains the same cluster2.addIndex(0) cluster2.addIndex(1) stable = cluster2.updateCentroid() cornelltest.assert_float_lists_equal([0.5, 0.5, 0.0], cluster2.getCentroid()) cornelltest.assert_true(stable) # TEST CASE 2 (updateCentroid): centroid changes cluster2.addIndex(2) cluster2.addIndex(3) stable = cluster2.updateCentroid() cornelltest.assert_float_lists_equal([0.25, 0.25, 0.25], cluster2.getCentroid()) cornelltest.assert_false(stable) # updating again without changing points: centroid stable stable = cluster2.updateCentroid() cornelltest.assert_float_lists_equal([0.25, 0.25, 0.25], cluster2.getCentroid()) cornelltest.assert_true(stable) print ' Method Cluster.updateCentroid() looks okay' print ' Part B of class Cluster appears correct' print ''
def test_cluster_b(): """Test Part B of the Cluster class assignment.""" print ' Testing Part B of class Cluster' # A dataset with four points items = [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,0.0],[0.0,0.0,1.0]] dset = a6.Dataset(3, items) # Create two clusters cluster2 = a6.Cluster(dset, [0.5,0.5,0.0]) cluster3 = a6.Cluster(dset, [0.0,0.0,0.5]) # TEST CASE 1 (distance) dist = cluster2.distance([1.0,0.0,-1.0]) cornelltest.assert_floats_equal(1.22474487139,dist) # TEST CASE 2 (distance) dist = cluster2.distance([0.5,0.5,0.0]) cornelltest.assert_floats_equal(0.0,dist) # TEST CASE 3 (distance) dist = cluster3.distance([0.5,0.0,0.5]) cornelltest.assert_floats_equal(0.5,dist) print ' Method Cluster.distance() looks okay' # TEST CASE 1 (updateCentroid): centroid remains the same cluster2.addIndex(0) cluster2.addIndex(1) stable = cluster2.updateCentroid() cornelltest.assert_float_lists_equal([0.5, 0.5, 0.0], cluster2.getCentroid()) cornelltest.assert_true(stable) # TEST CASE 2 (updateCentroid): centroid changes cluster2.addIndex(2) cluster2.addIndex(3) stable = cluster2.updateCentroid() cornelltest.assert_float_lists_equal([0.25, 0.25, 0.25], cluster2.getCentroid()) cornelltest.assert_false(stable) # updating again without changing points: centroid stable stable = cluster2.updateCentroid() cornelltest.assert_float_lists_equal([0.25, 0.25, 0.25], cluster2.getCentroid()) cornelltest.assert_true(stable) print ' Method Cluster.updateCentroid() looks okay' print ' Part B of class Cluster appears correct' print ''
def test_pmap_has_word(): """Test function pmap_has_word""" print 'Testing function pmap_has_word' # Start with an empty prefix map pmap = {} cornelltest.assert_false(a4.pmap_has_word(pmap, 'a')) # Build a pmap manually pmap = {'': ['a'], 'a': ['', 't'], 'at': ['']} cornelltest.assert_false(a4.pmap_has_word(pmap, '')) # NOT A WORD cornelltest.assert_false(a4.pmap_has_word(pmap, 'by')) cornelltest.assert_true(a4.pmap_has_word(pmap, 'a')) cornelltest.assert_true(a4.pmap_has_word(pmap, 'at'))
def test_pmap_has_word(): """Test function pmap_has_word""" print 'Testing function pmap_has_word' # Start with an empty prefix map pmap = {} cornelltest.assert_false(a4.pmap_has_word(pmap, 'a')) # Build a pmap manually pmap = { '':['a'], 'a':['','t'], 'at':[''] } cornelltest.assert_false(a4.pmap_has_word(pmap, '')) # NOT A WORD cornelltest.assert_false(a4.pmap_has_word(pmap, 'by')) cornelltest.assert_true(a4.pmap_has_word(pmap, 'a')) cornelltest.assert_true(a4.pmap_has_word(pmap, 'at'))
def testE(): """Test Part E (of Part I) of the assignment. This tests the final part of K-means. It gets a lot easier from here. As with the test for Part D, we have to use random.seed to fix the random number generator.""" print ' Testing Part E' # Force the random number generator to not be random random.seed(3) # More interesting result than a seed of 1 # FOR ALL TEST CASES # Create and initialize a non-empty database items = [[0.5, 0.5, 0.5], [0.5, 0.6, 0.6], [0.6, 0.5, 0.6], [0.5, 0.6, 0.5], [0.5, 0.4, 0.5], [0.5, 0.4, 0.4]] dbase = Database(3, items) dbase.setKSize(2) # PRE-TEST: Check first cluster (should be okay if passed part D) cluster1 = dbase.getCluster(0) cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6], cluster1.getCentroid()) cornelltest.assert_float_lists_equal(items[1:3], cluster1.getContents()) # PRE-TEST: Check second cluster (should be okay if passed part D) cluster2 = dbase.getCluster(1) cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5], cluster2.getCentroid()) cornelltest.assert_float_lists_equal([items[0]] + items[3:], cluster2.getContents()) # Make a copy of a cluster (to test update() method) clustertest = Cluster(cluster1.getCentroid()) for point in cluster1.getContents(): clustertest.appendContents(point) # TEST CASE 1 (update) stable = clustertest.update() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], clustertest.getCentroid()) cornelltest.assert_false(stable) # Not yet stable # TEST CASE 2 (update) stable = clustertest.update() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], clustertest.getCentroid()) cornelltest.assert_true(stable) # Now it is stable print ' Method update() looks okay' # TEST CASE 3 (step) dbase.step() # K size should be unchanged cornelltest.assert_equals(2, dbase.getKSize()) # Check first cluster (WHICH HAS CHANGED!) cluster1 = dbase.getCluster(0) cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], cluster1.getCentroid()) cornelltest.assert_float_lists_equal(items[1:4], cluster1.getContents()) # Check second cluster (WHICH HAS CHANGED!) cluster2 = dbase.getCluster(1) cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475], cluster2.getCentroid()) cornelltest.assert_float_lists_equal([items[0]] + items[4:], cluster2.getContents()) print ' Method step() looks okay' print ' Part E appears correct'
def test_kmeans_c(): """Test Part C of the ClusterGroup class.""" print ' Testing Part C of class ClusterGroup' items = [[0.,0.], [10.,1.], [10.,10.], [0.,9.]] dset = a6.Dataset(2, items) km1 = a6.ClusterGroup(dset, 2, [0,2]) km1._partition() # Test update() stable = km1._update() cornelltest.assert_float_lists_equal([0,4.5], km1.getClusters()[0].getCentroid()) cornelltest.assert_float_lists_equal([10.0,5.5], km1.getClusters()[1].getCentroid()) cornelltest.assert_false(stable) # updating again should not change anything, but should return stable stable = km1._update() cornelltest.assert_float_lists_equal([0,4.5], km1.getClusters()[0].getCentroid()) cornelltest.assert_float_lists_equal([10.0,5.5], km1.getClusters()[1].getCentroid()) cornelltest.assert_true(stable) print ' Method ClusterGroup._update() looks okay' # Now test the k-means process itself. # FOR ALL TEST CASES # Create and initialize a non-empty dataset items = [[0.5,0.5,0.5],[0.5,0.6,0.6],[0.6,0.5,0.6],[0.5,0.6,0.5],[0.5,0.4,0.5],[0.5,0.4,0.4]] dset = a6.Dataset(3,items) # Create a clustering, providing non-random seed indices so the test is deterministic km2 = a6.ClusterGroup(dset, 2, [1, 3]) # PRE-TEST: Check first cluster (should be okay if passed part D) cluster1 = km2.getClusters()[0] cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6], cluster1.getCentroid()) cornelltest.assert_equals(set([]), set(cluster1.getIndices())) # PRE-TEST: Check second cluster (should be okay if passed part D) cluster2 = km2.getClusters()[1] cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5], cluster2.getCentroid()) cornelltest.assert_equals(set([]), set(cluster2.getIndices())) # Make a fake cluster to test update_centroid() method clustertest = a6.Cluster(dset, [0.5, 0.6, 0.6]) for ind in [1, 2]: clustertest.addIndex(ind) # TEST CASE 1 (update) stable = clustertest.updateCentroid() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid()) cornelltest.assert_false(stable) # Not yet stable # TEST CASE 2 (update) stable = clustertest.updateCentroid() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid()) cornelltest.assert_true(stable) # Now it is stable # TEST CASE 3 (step) km2.step() # Check first cluster (WHICH HAS CHANGED!) cluster1 = km2.getClusters()[0] cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], cluster1.getCentroid()) cornelltest.assert_equals(set([1, 2]), set(cluster1.getIndices())) # Check second cluster (WHICH HAS CHANGED!) cluster2 = km2.getClusters()[1] cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475],cluster2.getCentroid()) cornelltest.assert_equals(set([0, 3, 4, 5]), set(cluster2.getIndices())) # TEST CASE 3 (step) km2.step() # Check first cluster (WHICH HAS CHANGED!) cluster1 = km2.getClusters()[0] cornelltest.assert_float_lists_equal([8./15, 17./30, 17./30], cluster1.getCentroid()) cornelltest.assert_equals(set([1, 2, 3]), set(cluster1.getIndices())) # Check second cluster (WHICH HAS CHANGED!) cluster2 = km2.getClusters()[1] cornelltest.assert_float_lists_equal([0.5, 13./30, 14./30],cluster2.getCentroid()) cornelltest.assert_equals(set([0, 4, 5]), set(cluster2.getIndices())) print ' Method ClusterGroup.step looks okay' print ' Part C of class ClusterGroup appears correct' print ''
def test_kmeans_c(): """Test Part C of the ClusterGroup class.""" print ' Testing Part C of class ClusterGroup' items = [[0., 0.], [10., 1.], [10., 10.], [0., 9.]] dset = a6.Dataset(2, items) km1 = a6.ClusterGroup(dset, 2, [0, 2]) km1._partition() # Test update() stable = km1._update() cornelltest.assert_float_lists_equal([0, 4.5], km1.getClusters()[0].getCentroid()) cornelltest.assert_float_lists_equal([10.0, 5.5], km1.getClusters()[1].getCentroid()) cornelltest.assert_false(stable) # updating again should not change anything, but should return stable stable = km1._update() cornelltest.assert_float_lists_equal([0, 4.5], km1.getClusters()[0].getCentroid()) cornelltest.assert_float_lists_equal([10.0, 5.5], km1.getClusters()[1].getCentroid()) cornelltest.assert_true(stable) print ' Method ClusterGroup._update() looks okay' # Now test the k-means process itself. # FOR ALL TEST CASES # Create and initialize a non-empty dataset items = [[0.5, 0.5, 0.5], [0.5, 0.6, 0.6], [0.6, 0.5, 0.6], [0.5, 0.6, 0.5], [0.5, 0.4, 0.5], [0.5, 0.4, 0.4]] dset = a6.Dataset(3, items) # Create a clustering, providing non-random seed indices so the test is deterministic km2 = a6.ClusterGroup(dset, 2, [1, 3]) # PRE-TEST: Check first cluster (should be okay if passed part D) cluster1 = km2.getClusters()[0] cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6], cluster1.getCentroid()) cornelltest.assert_equals(set([]), set(cluster1.getIndices())) # PRE-TEST: Check second cluster (should be okay if passed part D) cluster2 = km2.getClusters()[1] cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5], cluster2.getCentroid()) cornelltest.assert_equals(set([]), set(cluster2.getIndices())) # Make a fake cluster to test update_centroid() method clustertest = a6.Cluster(dset, [0.5, 0.6, 0.6]) for ind in [1, 2]: clustertest.addIndex(ind) # TEST CASE 1 (update) stable = clustertest.updateCentroid() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], clustertest.getCentroid()) cornelltest.assert_false(stable) # Not yet stable # TEST CASE 2 (update) stable = clustertest.updateCentroid() cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], clustertest.getCentroid()) cornelltest.assert_true(stable) # Now it is stable # TEST CASE 3 (step) km2.step() # Check first cluster (WHICH HAS CHANGED!) cluster1 = km2.getClusters()[0] cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], cluster1.getCentroid()) cornelltest.assert_equals(set([1, 2]), set(cluster1.getIndices())) # Check second cluster (WHICH HAS CHANGED!) cluster2 = km2.getClusters()[1] cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475], cluster2.getCentroid()) cornelltest.assert_equals(set([0, 3, 4, 5]), set(cluster2.getIndices())) # TEST CASE 3 (step) km2.step() # Check first cluster (WHICH HAS CHANGED!) cluster1 = km2.getClusters()[0] cornelltest.assert_float_lists_equal([8. / 15, 17. / 30, 17. / 30], cluster1.getCentroid()) cornelltest.assert_equals(set([1, 2, 3]), set(cluster1.getIndices())) # Check second cluster (WHICH HAS CHANGED!) cluster2 = km2.getClusters()[1] cornelltest.assert_float_lists_equal([0.5, 13. / 30, 14. / 30], cluster2.getCentroid()) cornelltest.assert_equals(set([0, 4, 5]), set(cluster2.getIndices())) # Try it on a file km3 = candy_to_kmeans('datasets/smallcandy.csv', 3, [23, 54, 36]) km3.step() # The actual results cluster0 = km3.getClusters()[0] cluster1 = km3.getClusters()[1] cluster2 = km3.getClusters()[2] # The "correct" answers contents0 = [[0.88, 0.84, 0.8, 0.3], [0.02, 0.67, 0.75, 0.61], [0.2, 0.54, 0.73, 0.85], [0.62, 0.75, 0.65, 0.43], [0.35, 0.63, 0.65, 0.12], [0.61, 0.85, 0.81, 0.44], [0.95, 0.94, 0.98, 0.69], [0.04, 0.69, 0.38, 0.39], [0.04, 0.52, 0.99, 0.75], [0.28, 0.91, 0.63, 0.08], [0.14, 0.55, 0.67, 0.63], [0.38, 0.94, 0.53, 0.07], [0.08, 0.62, 0.32, 0.27], [0.69, 0.82, 0.75, 0.65], [0.84, 0.89, 0.91, 0.38], [0.22, 0.88, 0.39, 0.33], [0.39, 0.38, 0.85, 0.32], [0.26, 0.39, 0.95, 0.63], [0.15, 0.87, 0.62, 0.22], [0.65, 0.81, 0.69, 0.55], [0.27, 0.63, 0.69, 0.39], [0.35, 0.7, 0.41, 0.15], [0.2, 0.48, 0.98, 0.84], [0.76, 0.86, 0.74, 0.61], [0.27, 0.65, 0.52, 0.28], [0.86, 0.91, 0.88, 0.62], [0.1, 0.79, 0.5, 0.12], [0.09, 0.85, 0.55, 0.21], [0.79, 0.94, 0.83, 0.48], [0.73, 0.92, 0.74, 0.39], [0.31, 0.5, 0.87, 0.85], [0.39, 0.9, 0.52, 0.26], [0.46, 0.35, 0.96, 0.05], [0.21, 0.62, 0.33, 0.09], [0.58, 0.37, 0.9, 0.08], [0.54, 0.92, 0.36, 0.35], [0.36, 0.64, 0.57, 0.26], [0.09, 0.47, 0.63, 0.8], [0.4, 0.69, 0.74, 0.7]] contents1 = [[0.32, 0.87, 0.14, 0.68], [0.87, 0.99, 0.2, 0.8], [0.86, 0.86, 0.32, 0.88], [0.81, 0.66, 0.26, 0.82], [0.91, 0.98, 0.61, 0.58], [0.84, 0.88, 0.04, 0.86], [0.8, 0.62, 0.09, 0.65], [0.72, 0.88, 0.02, 0.95], [0.88, 0.96, 0.09, 0.88]] contents2 = [[0.4, 0.21, 0.78, 0.68], [0.54, 0.06, 0.81, 0.98], [0.73, 0.31, 0.15, 0.08], [0.81, 0.69, 0.65, 0.65], [0.14, 0.31, 0.86, 0.74], [0.77, 0.45, 0.31, 0.31], [0.39, 0.14, 0.99, 0.24], [0.23, 0.32, 0.7, 0.75], [0.65, 0.05, 0.39, 0.49], [0.96, 0.09, 0.49, 0.3], [0.86, 0.03, 0.3, 0.39], [0.5, 0.2, 0.69, 0.95], [0.79, 0.09, 0.41, 0.69], [0.4, 0.3, 0.78, 0.74], [0.65, 0.24, 0.63, 0.27], [0.35, 0.3, 0.94, 0.92], [0.71, 0.78, 0.64, 0.57], [0.8, 0.4, 0.23, 0.33], [0.38, 0.07, 0.82, 0.01], [0.66, 0.09, 0.69, 0.46], [0.54, 0.06, 0.74, 0.86], [0.95, 0.62, 0.28, 0.01], [0.35, 0.71, 0.01, 0.32], [0.62, 0.24, 0.77, 0.17], [0.73, 0.65, 0.23, 0.02], [0.27, 0.38, 0.76, 0.63], [0.9, 0.63, 0.83, 0.6], [0.7, 0.04, 0.7, 0.82], [0.95, 0.83, 0.64, 0.5], [0.41, 0.11, 0.61, 0.78], [0.22, 0.44, 0.67, 0.99], [0.51, 0.05, 0.95, 0.66], [0.99, 0.68, 0.8, 0.42], [0.72, 0.55, 0.1, 0.17], [0.44, 0.1, 0.61, 0.98], [0.31, 0.16, 0.95, 0.9], [0.61, 0.42, 0.24, 0.33], [0.89, 0.72, 0.78, 0.38], [0.5, 0.09, 0.84, 0.78], [0.62, 0.01, 0.88, 0.1], [0.44, 0.28, 0.88, 0.99], [0.57, 0.23, 0.6, 0.85], [0.9, 0.05, 0.34, 0.41], [0.9, 0.41, 0.27, 0.36], [0.67, 0.32, 0.66, 0.2], [0.72, 0.14, 0.63, 0.37], [0.39, 0.08, 0.77, 0.96], [0.9, 0.7, 0.74, 0.63], [0.63, 0.05, 0.52, 0.63], [0.62, 0.27, 0.67, 0.77], [0.35, 0.04, 0.85, 0.86], [0.36, 0.34, 0.75, 0.37]] centroid0 = [ 0.3987179487179487, 0.7097435897435899, 0.6864102564102561, 0.4164102564102565 ] centroid1 = [ 0.7788888888888889, 0.8555555555555555, 0.19666666666666668, 0.788888888888889 ] centroid2 = [ 0.6038461538461538, 0.29865384615384616, 0.6217307692307692, 0.5455769230769231 ] cornelltest.assert_float_lists_equal(centroid0, cluster0.getCentroid()) cornelltest.assert_float_lists_equal(centroid1, cluster1.getCentroid()) cornelltest.assert_float_lists_equal(centroid2, cluster2.getCentroid()) cornelltest.assert_float_lists_equal(contents0, cluster0.getContents()) cornelltest.assert_float_lists_equal(contents1, cluster1.getContents()) cornelltest.assert_float_lists_equal(contents2, cluster2.getContents()) print ' Method ClusterGroup.step looks okay' print ' Part C of class ClusterGroup appears correct' print ''