Пример #1
0
def test_best_cluster():
    """Test the analysis portion of the assignment.
    
    We only have one test case for this part of the assignment.  As
    everything else is working, one should be enough."""
    print '  Testing best_cluster on small_candy.txt'
    random.seed(2)
    cluster = best_cluster('small_candy.txt', 3, 20, 0.25)
    centroid = [
        0.6730769230769229, 0.26384615384615384, 0.5303846153846155,
        0.27538461538461534
    ]
    contents = [[0.73, 0.31, 0.15, 0.08], [0.77, 0.45, 0.31, 0.31],
                [0.39, 0.14, 0.99, 0.24], [0.65, 0.05, 0.39, 0.49],
                [0.96, 0.09, 0.49, 0.30], [0.86, 0.03, 0.30, 0.39],
                [0.79, 0.09, 0.41, 0.69], [0.65, 0.24, 0.63, 0.27],
                [0.80, 0.40, 0.23, 0.33], [0.39, 0.38, 0.85, 0.32],
                [0.38, 0.07, 0.82, 0.01], [0.66, 0.09, 0.69, 0.46],
                [0.95, 0.62, 0.28, 0.01], [0.62, 0.24, 0.77, 0.17],
                [0.73, 0.65, 0.23, 0.02], [0.72, 0.55, 0.10, 0.17],
                [0.61, 0.42, 0.24, 0.33], [0.46, 0.35, 0.96, 0.05],
                [0.62, 0.01, 0.88, 0.10], [0.58, 0.37, 0.90, 0.08],
                [0.90, 0.05, 0.34, 0.41], [0.90, 0.41, 0.27, 0.36],
                [0.67, 0.32, 0.66, 0.20], [0.72, 0.14, 0.63, 0.37],
                [0.63, 0.05, 0.52, 0.63], [0.36, 0.34, 0.75, 0.37]]

    cornelltest.assert_float_lists_equal(centroid, cluster.getCentroid())
    cornelltest.assert_float_lists_equal(contents, cluster.getContents())
    print '  best_cluster appears to be working correctly'
Пример #2
0
def testB():
    """Test Part B (of Part I) of the assignment. 
    
    This test procedure includes getCluster from part A.  To test getCluster,
    we have to initialize your _clusters attribute.  We can only do this by
    accessing the hidden attribute _clusters in this function.  Normally, this
    is bad programming (hidden attributes can be used inside of the class
    definition, but not outside).  But sometimes rules are meant to be broken, 
    and testing is a good time to break rules."""
    print '  Testing Part B'
    # TEST CASE 1
    # Create and test a cluster (always empty)
    point = [0.0,1.0,0.0]
    cluster1 = Cluster(point)
    
    # Compare centroid and contents
    cornelltest.assert_float_lists_equal(point,cluster1.getCentroid())
    cornelltest.assert_float_lists_equal([],cluster1.getContents())
    # Make sure centroid COPIED
    cornelltest.assert_not_equals(id(point),id(cluster1.getContents()))
	
    # Add something to cluster (and check it was added)
    extra = [0.0,0.5,4.2]
    cluster1.appendContents(extra)
    # Cluster is a 2D-list.
    cornelltest.assert_float_lists_equal([extra],cluster1.getContents())
    # Check the point was COPIED
    cornelltest.assert_false(id(extra) in map(id,cluster1.getContents()))

    # And clear it
    cluster1.clearContents()
    cornelltest.assert_float_lists_equal([],cluster1.getContents())
    print '    Basic cluster methods look okay'
    
    # TEST CASE 2 (getCluster)
    # Make a second cluster
    cluster2 = Cluster([0.0,0.0,0.0])
    
    # Now make a database and put these in _clusters attribute
    dbase = Database(3)
    # THIS VIOLATES GOOD PROGRAMMING. But sometimes rules must be broken.    
    dbase._clusters = [cluster1,cluster2]
    dbase._ksize = 2
    
    # Check that I get the right objects back
    # MUST COMPARE FOLDER IDENTIFIERS.  Use the id function.
    cornelltest.assert_equals(id(cluster1),id(dbase.getCluster(0)))
    cornelltest.assert_equals(id(cluster2),id(dbase.getCluster(1)))
    print '    Method getCluster() looks okay'
    print '  Part B appears correct'
Пример #3
0
def testB():
    """Test Part B (of Part I) of the assignment. 
    
    This test procedure includes getCluster from part A.  To test getCluster,
    we have to initialize your _clusters attribute.  We can only do this by
    accessing the hidden attribute _clusters in this function.  Normally, this
    is bad programming (hidden attributes can be used inside of the class
    definition, but not outside).  But sometimes rules are meant to be broken, 
    and testing is a good time to break rules."""
    print '  Testing Part B'
    # TEST CASE 1
    # Create and test a cluster (always empty)
    point = [0.0, 1.0, 0.0]
    cluster1 = Cluster(point)

    # Compare centroid and contents
    cornelltest.assert_float_lists_equal(point, cluster1.getCentroid())
    cornelltest.assert_float_lists_equal([], cluster1.getContents())
    # Make sure centroid COPIED
    cornelltest.assert_not_equals(id(point), id(cluster1.getContents()))

    # Add something to cluster (and check it was added)
    extra = [0.0, 0.5, 4.2]
    cluster1.appendContents(extra)
    # Cluster is a 2D-list.
    cornelltest.assert_float_lists_equal([extra], cluster1.getContents())
    # Check the point was COPIED
    cornelltest.assert_false(id(extra) in map(id, cluster1.getContents()))

    # And clear it
    cluster1.clearContents()
    cornelltest.assert_float_lists_equal([], cluster1.getContents())
    print '    Basic cluster methods look okay'

    # TEST CASE 2 (getCluster)
    # Make a second cluster
    cluster2 = Cluster([0.0, 0.0, 0.0])

    # Now make a database and put these in _clusters attribute
    dbase = Database(3)
    # THIS VIOLATES GOOD PROGRAMMING. But sometimes rules must be broken.
    dbase._clusters = [cluster1, cluster2]
    dbase._ksize = 2

    # Check that I get the right objects back
    # MUST COMPARE FOLDER IDENTIFIERS.  Use the id function.
    cornelltest.assert_equals(id(cluster1), id(dbase.getCluster(0)))
    cornelltest.assert_equals(id(cluster2), id(dbase.getCluster(1)))
    print '    Method getCluster() looks okay'
    print '  Part B appears correct'
Пример #4
0
def test_best_cluster():
    """Test the analysis portion of the assignment.
    
    We only have one test case for this part of the assignment.  As
    everything else is working, one should be enough."""
    print '  Testing best_cluster on small_candy.txt'
    random.seed(2)
    cluster = best_cluster('small_candy.txt',3,20,0.25)
    centroid = [0.6730769230769229, 0.26384615384615384, 0.5303846153846155, 0.27538461538461534]
    contents = [[0.73, 0.31, 0.15, 0.08], [0.77, 0.45, 0.31, 0.31], [0.39, 0.14, 0.99, 0.24], 
                [0.65, 0.05, 0.39, 0.49], [0.96, 0.09, 0.49, 0.30], [0.86, 0.03, 0.30, 0.39], 
                [0.79, 0.09, 0.41, 0.69], [0.65, 0.24, 0.63, 0.27], [0.80, 0.40, 0.23, 0.33], 
                [0.39, 0.38, 0.85, 0.32], [0.38, 0.07, 0.82, 0.01], [0.66, 0.09, 0.69, 0.46], 
                [0.95, 0.62, 0.28, 0.01], [0.62, 0.24, 0.77, 0.17], [0.73, 0.65, 0.23, 0.02], 
                [0.72, 0.55, 0.10, 0.17], [0.61, 0.42, 0.24, 0.33], [0.46, 0.35, 0.96, 0.05], 
                [0.62, 0.01, 0.88, 0.10], [0.58, 0.37, 0.90, 0.08], [0.90, 0.05, 0.34, 0.41], 
                [0.90, 0.41, 0.27, 0.36], [0.67, 0.32, 0.66, 0.20], [0.72, 0.14, 0.63, 0.37], 
                [0.63, 0.05, 0.52, 0.63], [0.36, 0.34, 0.75, 0.37]]


    cornelltest.assert_float_lists_equal(centroid,cluster.getCentroid())
    cornelltest.assert_float_lists_equal(contents,cluster.getContents()) 
    print '  best_cluster appears to be working correctly'
def test_cluster_b():
    """Test Part B of the Cluster class assignment."""
    print '  Testing Part B of class Cluster'

    # A dataset with four points
    items = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0],
             [0.0, 0.0, 1.0]]
    dset = a6.Dataset(3, items)

    # Create two clusters
    cluster2 = a6.Cluster(dset, [0.5, 0.5, 0.0])
    cluster3 = a6.Cluster(dset, [0.0, 0.0, 0.5])

    # TEST CASE 1 (distance)
    dist = cluster2.distance([1.0, 0.0, -1.0])
    cornelltest.assert_floats_equal(1.22474487139, dist)

    # TEST CASE 2 (distance)
    dist = cluster2.distance([0.5, 0.5, 0.0])
    cornelltest.assert_floats_equal(0.0, dist)

    # TEST CASE 3 (distance)
    dist = cluster3.distance([0.5, 0.0, 0.5])
    cornelltest.assert_floats_equal(0.5, dist)
    print '    Method Cluster.distance() looks okay'

    # TEST CASE 1 (updateCentroid): centroid remains the same
    cluster2.addIndex(0)
    cluster2.addIndex(1)
    stable = cluster2.updateCentroid()
    cornelltest.assert_float_lists_equal([0.5, 0.5, 0.0],
                                         cluster2.getCentroid())
    cornelltest.assert_true(stable)

    # TEST CASE 2 (updateCentroid): centroid changes
    cluster2.addIndex(2)
    cluster2.addIndex(3)
    stable = cluster2.updateCentroid()
    cornelltest.assert_float_lists_equal([0.25, 0.25, 0.25],
                                         cluster2.getCentroid())
    cornelltest.assert_false(stable)
    # updating again without changing points: centroid stable
    stable = cluster2.updateCentroid()
    cornelltest.assert_float_lists_equal([0.25, 0.25, 0.25],
                                         cluster2.getCentroid())
    cornelltest.assert_true(stable)

    print '    Method Cluster.updateCentroid() looks okay'
    print '  Part B of class Cluster appears correct'
    print ''
Пример #6
0
def test_cluster_b():
    """Test Part B of the Cluster class assignment."""
    print '  Testing Part B of class Cluster'

    # A dataset with four points
    items = [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,0.0],[0.0,0.0,1.0]]
    dset = a6.Dataset(3, items)
    
    # Create two clusters
    cluster2 = a6.Cluster(dset, [0.5,0.5,0.0])
    cluster3 = a6.Cluster(dset, [0.0,0.0,0.5])
    
    # TEST CASE 1 (distance)
    dist = cluster2.distance([1.0,0.0,-1.0])
    cornelltest.assert_floats_equal(1.22474487139,dist)
    
    # TEST CASE 2 (distance)
    dist = cluster2.distance([0.5,0.5,0.0])
    cornelltest.assert_floats_equal(0.0,dist)
    
    # TEST CASE 3 (distance)
    dist = cluster3.distance([0.5,0.0,0.5])
    cornelltest.assert_floats_equal(0.5,dist)
    print '    Method Cluster.distance() looks okay'
    
    # TEST CASE 1 (updateCentroid): centroid remains the same
    cluster2.addIndex(0)
    cluster2.addIndex(1)
    stable = cluster2.updateCentroid()
    cornelltest.assert_float_lists_equal([0.5, 0.5, 0.0], cluster2.getCentroid())
    cornelltest.assert_true(stable)

    # TEST CASE 2 (updateCentroid): centroid changes
    cluster2.addIndex(2)
    cluster2.addIndex(3)

    stable = cluster2.updateCentroid()
    cornelltest.assert_float_lists_equal([0.25, 0.25, 0.25], cluster2.getCentroid())
    cornelltest.assert_false(stable)
    # updating again without changing points: centroid stable
    stable = cluster2.updateCentroid()
    cornelltest.assert_float_lists_equal([0.25, 0.25, 0.25], cluster2.getCentroid())
    cornelltest.assert_true(stable)

    print '    Method Cluster.updateCentroid() looks okay'
    print '  Part B of class Cluster appears correct'
    print ''
Пример #7
0
def test_cluster_a():
    """Test Part A of the Cluster class assignment."""
    print '  Testing Part A of class Cluster'
    
    # TEST CASE 1
    # Create and test a cluster (always empty)
    dset = a6.Dataset(3)
    point = [0.0,1.0,0.0]
    cluster1 = a6.Cluster(dset, point)
    
    # Compare centroid and contents
    cornelltest.assert_float_lists_equal(point,cluster1.getCentroid())
    cornelltest.assert_equals([],cluster1.getIndices())
    # Make sure centroid COPIED
    cornelltest.assert_not_equals(id(point),id(cluster1.getCentroid()))
    
    print '    Basic cluster methods look okay'    
    
    # Add something to cluster (and check it was added)
    extra = [[0.0,0.5,4.2],[0.0,1.0,0.0]]
    dset.addPoint(extra[0])
    dset.addPoint(extra[1])
    cluster1.addIndex(1)
    cornelltest.assert_equals([1],cluster1.getIndices())
    cluster1.addIndex(0)
    cornelltest.assert_equals([1,0],cluster1.getIndices())
    # Make sure we can handle duplicates!
    cluster1.addIndex(1)
    cornelltest.assert_equals([1,0],cluster1.getIndices())
    
    print '    Method Cluster.addIndex look okay'
    
    # And clear it
    contents = cluster1.getContents()
    cornelltest.assert_equals(2,len(contents))
    cornelltest.assert_float_lists_equal(extra[1],contents[0])
    cornelltest.assert_float_lists_equal(extra[0],contents[1])
    
    print '    Method Cluster.getContents look okay'
    
    # And clear it
    cluster1.clear()
    cornelltest.assert_equals([],cluster1.getIndices())
    
    print '    Method Cluster.clear look okay'
    print '  Part A of class Cluster appears correct'
    print ''
Пример #8
0
def test_dataset():
    """Test the Dataset class."""
    print '  Testing class Dataset'
    
    # TEST CASE 1
    # Create and test an empty dataset
    dset1 = a6.Dataset(3)
    cornelltest.assert_equals(3,dset1.getDimension())
    cornelltest.assert_equals(0,dset1.getSize())
    
    # We use this assert function to compare lists
    cornelltest.assert_float_lists_equal([],dset1.getContents())
    
    print '    Default initialization looks okay'
    
    # TEST CASE 2
    # Create and test a non-empty dataset
    items = [[0.0,0.0,0.0],[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]]
    dset2 = a6.Dataset(3,items)
    cornelltest.assert_equals(3,dset2.getDimension())
    cornelltest.assert_equals(4,dset2.getSize())
    
    # Check that contents is initialized correctly
    # Make sure items is COPIED
    cornelltest.assert_float_lists_equal(items,dset2.getContents())
    cornelltest.assert_false(dset2.getContents() is items)
    cornelltest.assert_false(dset2.getContents()[0] is items[0])
    
    print '    User-provided initialization looks okay'
    
    # Check that getPoint() is correct AND that it copies
    cornelltest.assert_float_lists_equal([0.0,1.0,0.0],dset2.getPoint(2))
    cornelltest.assert_false(dset2.getContents()[2] is dset2.getPoint(2))
    
    print '    Method Dataset.getPoint looks okay'
    
    # Add something to the dataset (and check it was added)
    dset1.addPoint([0.0,0.5,4.2])
    cornelltest.assert_float_lists_equal([[0.0,0.5,4.2]],dset1.getContents())
    cornelltest.assert_float_lists_equal([0.0,0.5,4.2],dset1.getPoint(0))
    # Check the point is COPIED
    cornelltest.assert_false(dset1.getPoint(0) is dset1.getContents()[0])
    
    extra = [0.0,0.5,4.2]
    dset2.addPoint(extra)
    items.append(extra)
    cornelltest.assert_float_lists_equal(items,dset2.getContents())
    # Check the point was COPIED
    cornelltest.assert_false(id(extra) in map(id,dset2.getContents()))
    
    print '    Method Dataset.addPoint looks okay'
    print '  class Dataset appears correct'
    print ''
def test_kmeans_c():
    """Test Part C of the ClusterGroup class."""
    print '  Testing Part C of class ClusterGroup'
    items = [[0., 0.], [10., 1.], [10., 10.], [0., 9.]]
    dset = a6.Dataset(2, items)
    km1 = a6.ClusterGroup(dset, 2, [0, 2])
    km1._partition()

    # Test update()
    stable = km1._update()
    cornelltest.assert_float_lists_equal([0, 4.5],
                                         km1.getClusters()[0].getCentroid())
    cornelltest.assert_float_lists_equal([10.0, 5.5],
                                         km1.getClusters()[1].getCentroid())
    cornelltest.assert_false(stable)

    # updating again should not change anything, but should return stable
    stable = km1._update()
    cornelltest.assert_float_lists_equal([0, 4.5],
                                         km1.getClusters()[0].getCentroid())
    cornelltest.assert_float_lists_equal([10.0, 5.5],
                                         km1.getClusters()[1].getCentroid())
    cornelltest.assert_true(stable)

    print '    Method ClusterGroup._update() looks okay'

    # Now test the k-means process itself.

    # FOR ALL TEST CASES
    # Create and initialize a non-empty dataset
    items = [[0.5, 0.5, 0.5], [0.5, 0.6, 0.6], [0.6, 0.5, 0.6],
             [0.5, 0.6, 0.5], [0.5, 0.4, 0.5], [0.5, 0.4, 0.4]]
    dset = a6.Dataset(3, items)

    # Create a clustering, providing non-random seed indices so the test is deterministic
    km2 = a6.ClusterGroup(dset, 2, [1, 3])

    # PRE-TEST: Check first cluster (should be okay if passed part D)
    cluster1 = km2.getClusters()[0]
    cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6],
                                         cluster1.getCentroid())
    cornelltest.assert_equals(set([]), set(cluster1.getIndices()))

    # PRE-TEST: Check second cluster (should be okay if passed part D)
    cluster2 = km2.getClusters()[1]
    cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5],
                                         cluster2.getCentroid())
    cornelltest.assert_equals(set([]), set(cluster2.getIndices()))

    # Make a fake cluster to test update_centroid() method
    clustertest = a6.Cluster(dset, [0.5, 0.6, 0.6])
    for ind in [1, 2]:
        clustertest.addIndex(ind)

    # TEST CASE 1 (update)
    stable = clustertest.updateCentroid()
    cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],
                                         clustertest.getCentroid())
    cornelltest.assert_false(stable)  # Not yet stable

    # TEST CASE 2 (update)
    stable = clustertest.updateCentroid()
    cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],
                                         clustertest.getCentroid())
    cornelltest.assert_true(stable)  # Now it is stable

    # TEST CASE 3 (step)
    km2.step()

    # Check first cluster (WHICH HAS CHANGED!)
    cluster1 = km2.getClusters()[0]
    cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],
                                         cluster1.getCentroid())
    cornelltest.assert_equals(set([1, 2]), set(cluster1.getIndices()))

    # Check second cluster (WHICH HAS CHANGED!)
    cluster2 = km2.getClusters()[1]
    cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475],
                                         cluster2.getCentroid())
    cornelltest.assert_equals(set([0, 3, 4, 5]), set(cluster2.getIndices()))

    # TEST CASE 3 (step)
    km2.step()

    # Check first cluster (WHICH HAS CHANGED!)
    cluster1 = km2.getClusters()[0]
    cornelltest.assert_float_lists_equal([8. / 15, 17. / 30, 17. / 30],
                                         cluster1.getCentroid())
    cornelltest.assert_equals(set([1, 2, 3]), set(cluster1.getIndices()))

    # Check second cluster (WHICH HAS CHANGED!)
    cluster2 = km2.getClusters()[1]
    cornelltest.assert_float_lists_equal([0.5, 13. / 30, 14. / 30],
                                         cluster2.getCentroid())
    cornelltest.assert_equals(set([0, 4, 5]), set(cluster2.getIndices()))

    # Try it on a file
    km3 = candy_to_kmeans('datasets/smallcandy.csv', 3, [23, 54, 36])
    km3.step()

    # The actual results
    cluster0 = km3.getClusters()[0]
    cluster1 = km3.getClusters()[1]
    cluster2 = km3.getClusters()[2]

    # The "correct" answers
    contents0 = [[0.88, 0.84, 0.8, 0.3], [0.02, 0.67, 0.75, 0.61],
                 [0.2, 0.54, 0.73, 0.85], [0.62, 0.75, 0.65, 0.43],
                 [0.35, 0.63, 0.65, 0.12], [0.61, 0.85, 0.81, 0.44],
                 [0.95, 0.94, 0.98, 0.69], [0.04, 0.69, 0.38, 0.39],
                 [0.04, 0.52, 0.99, 0.75], [0.28, 0.91, 0.63, 0.08],
                 [0.14, 0.55, 0.67, 0.63], [0.38, 0.94, 0.53, 0.07],
                 [0.08, 0.62, 0.32, 0.27], [0.69, 0.82, 0.75, 0.65],
                 [0.84, 0.89, 0.91, 0.38], [0.22, 0.88, 0.39, 0.33],
                 [0.39, 0.38, 0.85, 0.32], [0.26, 0.39, 0.95, 0.63],
                 [0.15, 0.87, 0.62, 0.22], [0.65, 0.81, 0.69, 0.55],
                 [0.27, 0.63, 0.69, 0.39], [0.35, 0.7, 0.41, 0.15],
                 [0.2, 0.48, 0.98, 0.84], [0.76, 0.86, 0.74, 0.61],
                 [0.27, 0.65, 0.52, 0.28], [0.86, 0.91, 0.88, 0.62],
                 [0.1, 0.79, 0.5, 0.12], [0.09, 0.85, 0.55, 0.21],
                 [0.79, 0.94, 0.83, 0.48], [0.73, 0.92, 0.74, 0.39],
                 [0.31, 0.5, 0.87, 0.85], [0.39, 0.9, 0.52, 0.26],
                 [0.46, 0.35, 0.96, 0.05], [0.21, 0.62, 0.33, 0.09],
                 [0.58, 0.37, 0.9, 0.08], [0.54, 0.92, 0.36, 0.35],
                 [0.36, 0.64, 0.57, 0.26], [0.09, 0.47, 0.63, 0.8],
                 [0.4, 0.69, 0.74, 0.7]]
    contents1 = [[0.32, 0.87, 0.14, 0.68], [0.87, 0.99, 0.2, 0.8],
                 [0.86, 0.86, 0.32, 0.88], [0.81, 0.66, 0.26, 0.82],
                 [0.91, 0.98, 0.61, 0.58], [0.84, 0.88, 0.04, 0.86],
                 [0.8, 0.62, 0.09, 0.65], [0.72, 0.88, 0.02, 0.95],
                 [0.88, 0.96, 0.09, 0.88]]
    contents2 = [[0.4, 0.21, 0.78, 0.68], [0.54, 0.06, 0.81, 0.98],
                 [0.73, 0.31, 0.15, 0.08], [0.81, 0.69, 0.65, 0.65],
                 [0.14, 0.31, 0.86, 0.74], [0.77, 0.45, 0.31, 0.31],
                 [0.39, 0.14, 0.99, 0.24], [0.23, 0.32, 0.7, 0.75],
                 [0.65, 0.05, 0.39, 0.49], [0.96, 0.09, 0.49, 0.3],
                 [0.86, 0.03, 0.3, 0.39], [0.5, 0.2, 0.69, 0.95],
                 [0.79, 0.09, 0.41, 0.69], [0.4, 0.3, 0.78, 0.74],
                 [0.65, 0.24, 0.63, 0.27], [0.35, 0.3, 0.94, 0.92],
                 [0.71, 0.78, 0.64, 0.57], [0.8, 0.4, 0.23, 0.33],
                 [0.38, 0.07, 0.82, 0.01], [0.66, 0.09, 0.69, 0.46],
                 [0.54, 0.06, 0.74, 0.86], [0.95, 0.62, 0.28, 0.01],
                 [0.35, 0.71, 0.01, 0.32], [0.62, 0.24, 0.77, 0.17],
                 [0.73, 0.65, 0.23, 0.02], [0.27, 0.38, 0.76, 0.63],
                 [0.9, 0.63, 0.83, 0.6], [0.7, 0.04, 0.7, 0.82],
                 [0.95, 0.83, 0.64, 0.5], [0.41, 0.11, 0.61, 0.78],
                 [0.22, 0.44, 0.67, 0.99], [0.51, 0.05, 0.95, 0.66],
                 [0.99, 0.68, 0.8, 0.42], [0.72, 0.55, 0.1, 0.17],
                 [0.44, 0.1, 0.61, 0.98], [0.31, 0.16, 0.95, 0.9],
                 [0.61, 0.42, 0.24, 0.33], [0.89, 0.72, 0.78, 0.38],
                 [0.5, 0.09, 0.84, 0.78], [0.62, 0.01, 0.88, 0.1],
                 [0.44, 0.28, 0.88, 0.99], [0.57, 0.23, 0.6, 0.85],
                 [0.9, 0.05, 0.34, 0.41], [0.9, 0.41, 0.27, 0.36],
                 [0.67, 0.32, 0.66, 0.2], [0.72, 0.14, 0.63, 0.37],
                 [0.39, 0.08, 0.77, 0.96], [0.9, 0.7, 0.74, 0.63],
                 [0.63, 0.05, 0.52, 0.63], [0.62, 0.27, 0.67, 0.77],
                 [0.35, 0.04, 0.85, 0.86], [0.36, 0.34, 0.75, 0.37]]
    centroid0 = [
        0.3987179487179487, 0.7097435897435899, 0.6864102564102561,
        0.4164102564102565
    ]
    centroid1 = [
        0.7788888888888889, 0.8555555555555555, 0.19666666666666668,
        0.788888888888889
    ]
    centroid2 = [
        0.6038461538461538, 0.29865384615384616, 0.6217307692307692,
        0.5455769230769231
    ]

    cornelltest.assert_float_lists_equal(centroid0, cluster0.getCentroid())
    cornelltest.assert_float_lists_equal(centroid1, cluster1.getCentroid())
    cornelltest.assert_float_lists_equal(centroid2, cluster2.getCentroid())
    cornelltest.assert_float_lists_equal(contents0, cluster0.getContents())
    cornelltest.assert_float_lists_equal(contents1, cluster1.getContents())
    cornelltest.assert_float_lists_equal(contents2, cluster2.getContents())

    print '    Method ClusterGroup.step looks okay'
    print '  Part C of class ClusterGroup appears correct'
    print ''
Пример #10
0
def testE():
    """Test Part E (of Part I) of the assignment. 

    This tests the final part of K-means.  It gets a lot easier from here.
    As with the test for Part D, we have to use random.seed to fix
    the random number generator."""
    print '  Testing Part E'
    # Force the random number generator to not be random
    random.seed(3)  # More interesting result than a seed of 1

    # FOR ALL TEST CASES
    # Create and initialize a non-empty database
    items = [[0.5, 0.5, 0.5], [0.5, 0.6, 0.6], [0.6, 0.5, 0.6],
             [0.5, 0.6, 0.5], [0.5, 0.4, 0.5], [0.5, 0.4, 0.4]]
    dbase = Database(3, items)
    dbase.setKSize(2)

    # PRE-TEST: Check first cluster (should be okay if passed part D)
    cluster1 = dbase.getCluster(0)
    cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6],
                                         cluster1.getCentroid())
    cornelltest.assert_float_lists_equal(items[1:3], cluster1.getContents())

    # PRE-TEST: Check second cluster (should be okay if passed part D)
    cluster2 = dbase.getCluster(1)
    cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5],
                                         cluster2.getCentroid())
    cornelltest.assert_float_lists_equal([items[0]] + items[3:],
                                         cluster2.getContents())

    # Make a copy of a cluster (to test update() method)
    clustertest = Cluster(cluster1.getCentroid())
    for point in cluster1.getContents():
        clustertest.appendContents(point)

    # TEST CASE 1 (update)
    stable = clustertest.update()
    cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],
                                         clustertest.getCentroid())
    cornelltest.assert_false(stable)  # Not yet stable

    # TEST CASE 2 (update)
    stable = clustertest.update()
    cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],
                                         clustertest.getCentroid())
    cornelltest.assert_true(stable)  # Now it is stable
    print '    Method update() looks okay'

    # TEST CASE 3 (step)
    dbase.step()

    # K size should be unchanged
    cornelltest.assert_equals(2, dbase.getKSize())

    # Check first cluster (WHICH HAS CHANGED!)
    cluster1 = dbase.getCluster(0)
    cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],
                                         cluster1.getCentroid())
    cornelltest.assert_float_lists_equal(items[1:4], cluster1.getContents())

    # Check second cluster (WHICH HAS CHANGED!)
    cluster2 = dbase.getCluster(1)
    cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475],
                                         cluster2.getCentroid())
    cornelltest.assert_float_lists_equal([items[0]] + items[4:],
                                         cluster2.getContents())
    print '    Method step() looks okay'
    print '  Part E appears correct'
Пример #11
0
def testD():
    """Test Part D (of Part I) of the assignment. 

    This test procedure shows why we are providing you with the 
    unit tests, rather than asking you to write your own.  The
    method setKSize() has a randomization element inside of it.
    It is hard to check random things, because you do not get
    the same answer each time.
    
    To get around that, we use the function random.seed(). This
    function essentially turns off the random number generator,
    and makes it return predicatable values.  For more information,
    see http://en.wikipedia.org/wiki/Pseudorandomness."""
    print '  Testing Part D (setKSize)'
    # Force the random number generator to not be random
    random.seed(1)

    # FOR BOTH TEST CASES
    # Create and test a non-empty database
    items = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0],
             [0.0, 0.0, 1.0]]
    dbase = Database(3, items)

    # TEST CASE 1 (Change k)
    dbase.setKSize(0)
    cornelltest.assert_equals(0, dbase.getKSize())
    cornelltest.assert_float_lists_equal([], dbase._clusters)

    # TEST CASE 2 (Change k)
    dbase.setKSize(2)

    # Should create two clusters
    cornelltest.assert_equals(2, dbase.getKSize())

    # Check first cluster
    cluster1 = dbase.getCluster(0)
    cornelltest.assert_float_lists_equal([1.0, 0.0, 0.0],
                                         cluster1.getCentroid())
    cornelltest.assert_float_lists_equal(items[:3], cluster1.getContents())

    # Check second cluster
    cluster2 = dbase.getCluster(1)
    cornelltest.assert_float_lists_equal([0.0, 0.0, 1.0],
                                         cluster2.getCentroid())
    cornelltest.assert_float_lists_equal(items[3:], cluster2.getContents())

    # TEST CASE 3 (Change k)
    dbase.setKSize(3)

    # Should create three clusters
    cornelltest.assert_equals(3, dbase.getKSize())

    # Check first cluster
    cluster1 = dbase.getCluster(0)
    cornelltest.assert_float_lists_equal([0.0, 0.0, 1.0],
                                         cluster1.getCentroid())
    cornelltest.assert_float_lists_equal(items[3:4], cluster1.getContents())

    # Check second cluster
    cluster2 = dbase.getCluster(1)
    cornelltest.assert_float_lists_equal([0.0, 1.0, 0.0],
                                         cluster2.getCentroid())
    cornelltest.assert_float_lists_equal(items[1:2], cluster2.getContents())

    # Check third cluster
    cluster3 = dbase.getCluster(2)
    cornelltest.assert_float_lists_equal([0.0, 0.0, 0.0],
                                         cluster3.getCentroid())
    cornelltest.assert_float_lists_equal(items[0:1] + items[2:3],
                                         cluster3.getContents())
    print '  Part D appears correct'
Пример #12
0
def testA():
    """Test Part A (of Part I) of the assignment. 
    
    This test procedure cannot test getCluster, as there are
    no clusters yet.  That test is moved to part B."""
    print '  Testing Part A'
    # TEST CASE 1
    # Create and test an empty database
    dbase = Database(3)
    cornelltest.assert_equals(3, dbase.getDimension())
    cornelltest.assert_equals(0, dbase.getKSize())

    # We use this BRAND NEW ASSERT to compare lists
    cornelltest.assert_float_lists_equal([], dbase.getContents())

    # Add something to the database (and check it was added)
    dbase.appendContents([0.0, 0.5, 4.2])
    # Database is a 2D-list.
    cornelltest.assert_float_lists_equal([[0.0, 0.5, 4.2]],
                                         dbase.getContents())

    # And clear it
    dbase.clearContents()
    cornelltest.assert_float_lists_equal([], dbase.getContents())
    print '    Default initialization looks okay'

    # TEST CASE 2
    # Create and test a non-empty database
    items = [[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0],
             [0.0, 0.0, 1.0]]
    dbase = Database(3, items)
    cornelltest.assert_equals(3, dbase.getDimension())
    cornelltest.assert_equals(0, dbase.getKSize())

    # Check that contents is initialized correctly
    # Make sure items is COPIED
    cornelltest.assert_float_lists_equal(items, dbase.getContents())
    cornelltest.assert_not_equals(id(items), id(dbase.getContents()))

    # Add something to the database (and check it was added)
    extra = [0.0, 0.5, 4.2]
    dbase.appendContents(extra)
    items.append(extra)
    cornelltest.assert_float_lists_equal(items, dbase.getContents())
    # Check the point was COPIED
    cornelltest.assert_false(id(extra) in map(id, dbase.getContents()))

    # And clear it
    dbase.clearContents()
    cornelltest.assert_float_lists_equal([], dbase.getContents())
    print '    User-given contents looks okay'
    print '  Part A appears correct'
Пример #13
0
def testC():
    """Test Part C (of Part I) of the assignment. 
    
    This test checks the methods both nearest and partition.
    For these checks, it has to go ahead and initialize some
    clusters (which is done in Part D).  We do this by accessing
    your hidden attributes. 
    
    Normally, this is bad programming (hidden attributes can be 
    used inside of the class definition, but not outside).  But 
    sometimes rules are meant to be broken, and testing is a good 
    time to break rules."""
    print '  Testing Part C'
    # FOR BOTH TEST CASES
    # Create and test a non-empty database
    items = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0],
             [0.0, 0.0, 1.0]]
    dbase = Database(3, items)

    # Create two clusters
    cluster1 = Cluster([0.5, 0.5, 0.0])
    cluster2 = Cluster([0.0, 0.0, 0.5])

    # Initialize the database to use these clusters (access hidden attributes)
    # THIS VIOLATES GOOD PROGRAMMING. But sometimes rules must be broken.
    dbase._clusters = [None, None]  # Make sure the list can hold two clusters
    dbase._clusters[0] = cluster1
    dbase._clusters[1] = cluster2
    dbase._ksize = 2

    # TEST CASE 1 (distance)
    dist = cluster1.distance([1.0, 0.0, -1.0])
    cornelltest.assert_floats_equal(1.22474487139, dist)

    # TEST CASE 2 (distance)
    dist = cluster1.distance([0.5, 0.5, 0.0])
    cornelltest.assert_floats_equal(0.0, dist)
    print '    Method distance() looks okay'

    # TEST CASE 3 (nearest)
    nearest = dbase.nearest([1.0, 0.0, 0.0])
    cornelltest.assert_equals(id(cluster1), id(nearest))

    # TEST CASE 4 (nearest)
    nearest = dbase.nearest([0.0, 0.0, 1.0])
    cornelltest.assert_equals(id(cluster2), id(nearest))
    print '    Method nearest() looks okay'

    # TEST CASE 5 (partition)
    dbase.partition()

    # First half of list is in first cluster
    cornelltest.assert_float_lists_equal(items[:2], cluster1.getContents())

    # Second half of list is in second cluster
    cornelltest.assert_float_lists_equal(items[2:], cluster2.getContents())

    # TEST CASE 6 (partition)
    # Change the clusters
    dbase._clusters[0].setCentroid([0.0, 0.0, 0.5])
    dbase._clusters[1].setCentroid([0.5, 0.5, 0.0])
    dbase.partition()

    # Second half of list is in first cluster
    cornelltest.assert_float_lists_equal(items[2:], cluster1.getContents())

    # First half of list is in second cluster
    cornelltest.assert_float_lists_equal(items[:2], cluster2.getContents())

    print '    Method partition() looks okay'
    print '  Part C appears correct'
Пример #14
0
def test_kmeans_d():
    """Test Part D of the ClusterGroup class."""
    print '  Testing Part D of class ClusterGroup'
    items = [[0.5,0.5,0.5],[0.5,0.6,0.6],[0.6,0.5,0.6],[0.5,0.6,0.5],[0.5,0.4,0.5],[0.5,0.4,0.4]]
    dset = a6.Dataset(3,items)
    
    # Try the same test case straight from the top using perform_k_means
    km1 = a6.ClusterGroup(dset, 2, [1, 3])
    km1.run(10)
    
    # Check first cluster
    cluster1 = km1.getClusters()[0]
    cornelltest.assert_float_lists_equal([8./15, 17./30, 17./30], cluster1.getCentroid())
    cornelltest.assert_equals(set([1, 2, 3]), set(cluster1.getIndices()))
    
    # Check second cluster
    cluster2 = km1.getClusters()[1]
    cornelltest.assert_float_lists_equal([0.5, 13./30, 14./30],cluster2.getCentroid())
    cornelltest.assert_equals(set([0, 4, 5]), set(cluster2.getIndices()))
    print '    Method run looks okay'
    
    # Test on a real world data set
    km2 = candy_to_kmeans('datasets/small_candy.csv',3,[23, 54, 36])
    km2.run(20)
    
    # The actual results
    cluster0 = km2.getClusters()[0]
    cluster1 = km2.getClusters()[1]
    cluster2 = km2.getClusters()[2]
    
    # The "correct" answers
    contents0 = [[0.88, 0.84, 0.8, 0.3], [0.02, 0.67, 0.75, 0.61], [0.81, 0.69, 0.65, 0.65], 
                 [0.62, 0.75, 0.65, 0.43], [0.35, 0.63, 0.65, 0.12], [0.61, 0.85, 0.81, 0.44], 
                 [0.95, 0.94, 0.98, 0.69], [0.04, 0.69, 0.38, 0.39], [0.28, 0.91, 0.63, 0.08], 
                 [0.38, 0.94, 0.53, 0.07], [0.08, 0.62, 0.32, 0.27], [0.69, 0.82, 0.75, 0.65], 
                 [0.84, 0.89, 0.91, 0.38], [0.22, 0.88, 0.39, 0.33], [0.71, 0.78, 0.64, 0.57], 
                 [0.15, 0.87, 0.62, 0.22], [0.65, 0.81, 0.69, 0.55], [0.27, 0.63, 0.69, 0.39], 
                 [0.35, 0.7, 0.41, 0.15], [0.91, 0.98, 0.61, 0.58], [0.9, 0.63, 0.83, 0.6], 
                 [0.95, 0.83, 0.64, 0.5], [0.76, 0.86, 0.74, 0.61], [0.27, 0.65, 0.52, 0.28], 
                 [0.86, 0.91, 0.88, 0.62], [0.1, 0.79, 0.5, 0.12], [0.99, 0.68, 0.8, 0.42], 
                 [0.09, 0.85, 0.55, 0.21], [0.79, 0.94, 0.83, 0.48], [0.73, 0.92, 0.74, 0.39], 
                 [0.89, 0.72, 0.78, 0.38], [0.39, 0.9, 0.52, 0.26], [0.46, 0.35, 0.96, 0.05], 
                 [0.21, 0.62, 0.33, 0.09], [0.58, 0.37, 0.9, 0.08], [0.54, 0.92, 0.36, 0.35], 
                 [0.67, 0.32, 0.66, 0.2], [0.36, 0.64, 0.57, 0.26], [0.9, 0.7, 0.74, 0.63], 
                 [0.4, 0.69, 0.74, 0.7]]
    contents1 = [[0.32, 0.87, 0.14, 0.68], [0.73, 0.31, 0.15, 0.08], [0.87, 0.99, 0.2, 0.8], 
                 [0.77, 0.45, 0.31, 0.31], [0.96, 0.09, 0.49, 0.3], [0.86, 0.03, 0.3, 0.39], 
                 [0.86, 0.86, 0.32, 0.88], [0.8, 0.4, 0.23, 0.33], [0.81, 0.66, 0.26, 0.82], 
                 [0.95, 0.62, 0.28, 0.01], [0.35, 0.71, 0.01, 0.32], [0.73, 0.65, 0.23, 0.02], 
                 [0.84, 0.88, 0.04, 0.86], [0.8, 0.62, 0.09, 0.65], [0.72, 0.55, 0.1, 0.17], 
                 [0.61, 0.42, 0.24, 0.33], [0.72, 0.88, 0.02, 0.95], [0.88, 0.96, 0.09, 0.88], 
                 [0.9, 0.05, 0.34, 0.41], [0.9, 0.41, 0.27, 0.36]]
    contents2 = [[0.4, 0.21, 0.78, 0.68], [0.54, 0.06, 0.81, 0.98], [0.2, 0.54, 0.73, 0.85], 
                 [0.14, 0.31, 0.86, 0.74], [0.39, 0.14, 0.99, 0.24], [0.23, 0.32, 0.7, 0.75], 
                 [0.65, 0.05, 0.39, 0.49], [0.04, 0.52, 0.99, 0.75], [0.14, 0.55, 0.67, 0.63], 
                 [0.5, 0.2, 0.69, 0.95], [0.79, 0.09, 0.41, 0.69], [0.4, 0.3, 0.78, 0.74], 
                 [0.65, 0.24, 0.63, 0.27], [0.35, 0.3, 0.94, 0.92], [0.39, 0.38, 0.85, 0.32], 
                 [0.38, 0.07, 0.82, 0.01], [0.66, 0.09, 0.69, 0.46], [0.26, 0.39, 0.95, 0.63], 
                 [0.54, 0.06, 0.74, 0.86], [0.2, 0.48, 0.98, 0.84], [0.62, 0.24, 0.77, 0.17], 
                 [0.27, 0.38, 0.76, 0.63], [0.7, 0.04, 0.7, 0.82], [0.41, 0.11, 0.61, 0.78], 
                 [0.22, 0.44, 0.67, 0.99], [0.51, 0.05, 0.95, 0.66], [0.44, 0.1, 0.61, 0.98], 
                 [0.31, 0.16, 0.95, 0.9], [0.31, 0.5, 0.87, 0.85], [0.5, 0.09, 0.84, 0.78], 
                 [0.62, 0.01, 0.88, 0.1], [0.44, 0.28, 0.88, 0.99], [0.57, 0.23, 0.6, 0.85], 
                 [0.72, 0.14, 0.63, 0.37], [0.39, 0.08, 0.77, 0.96], [0.09, 0.47, 0.63, 0.8], 
                 [0.63, 0.05, 0.52, 0.63], [0.62, 0.27, 0.67, 0.77], [0.35, 0.04, 0.85, 0.86], 
                 [0.36, 0.34, 0.75, 0.37]]
    centroid0 = [0.54125, 0.7545, 0.66125, 0.3775]
    centroid1 = [0.76900, 0.5705, 0.20550, 0.4775]
    centroid2 = [0.42325, 0.2330, 0.75775, 0.6765]

    cornelltest.assert_float_lists_equal(centroid0,cluster0.getCentroid())
    cornelltest.assert_float_lists_equal(centroid1,cluster1.getCentroid())
    cornelltest.assert_float_lists_equal(centroid2,cluster2.getCentroid())
    cornelltest.assert_float_lists_equal(contents0,cluster0.getContents()) 
    cornelltest.assert_float_lists_equal(contents1,cluster1.getContents()) 
    cornelltest.assert_float_lists_equal(contents2,cluster2.getContents()) 
    print '    Candy analysis test looks okay'
    print '  Part D of class ClusterGroup appears correct'
    print ''
Пример #15
0
def test_kmeans_c():
    """Test Part C of the ClusterGroup class."""
    print '  Testing Part C of class ClusterGroup'
    items = [[0.,0.], [10.,1.], [10.,10.], [0.,9.]]
    dset = a6.Dataset(2, items)
    km1 = a6.ClusterGroup(dset, 2, [0,2])
    km1._partition()
    
    # Test update()
    stable = km1._update()
    cornelltest.assert_float_lists_equal([0,4.5], km1.getClusters()[0].getCentroid())
    cornelltest.assert_float_lists_equal([10.0,5.5], km1.getClusters()[1].getCentroid())
    cornelltest.assert_false(stable)
    
    # updating again should not change anything, but should return stable
    stable = km1._update()
    cornelltest.assert_float_lists_equal([0,4.5], km1.getClusters()[0].getCentroid())
    cornelltest.assert_float_lists_equal([10.0,5.5], km1.getClusters()[1].getCentroid())
    cornelltest.assert_true(stable)

    print '    Method ClusterGroup._update() looks okay'

    # Now test the k-means process itself.

    # FOR ALL TEST CASES
    # Create and initialize a non-empty dataset
    items = [[0.5,0.5,0.5],[0.5,0.6,0.6],[0.6,0.5,0.6],[0.5,0.6,0.5],[0.5,0.4,0.5],[0.5,0.4,0.4]]
    dset = a6.Dataset(3,items)

    # Create a clustering, providing non-random seed indices so the test is deterministic
    km2 = a6.ClusterGroup(dset, 2, [1, 3])

    # PRE-TEST: Check first cluster (should be okay if passed part D)
    cluster1 = km2.getClusters()[0]
    cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6], cluster1.getCentroid())
    cornelltest.assert_equals(set([]), set(cluster1.getIndices()))

    # PRE-TEST: Check second cluster (should be okay if passed part D)
    cluster2 = km2.getClusters()[1]
    cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5], cluster2.getCentroid())
    cornelltest.assert_equals(set([]), set(cluster2.getIndices()))

    # Make a fake cluster to test update_centroid() method
    clustertest = a6.Cluster(dset, [0.5, 0.6, 0.6])
    for ind in [1, 2]:
        clustertest.addIndex(ind)

    # TEST CASE 1 (update)
    stable = clustertest.updateCentroid()
    cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid())
    cornelltest.assert_false(stable) # Not yet stable

    # TEST CASE 2 (update)
    stable = clustertest.updateCentroid()
    cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid())
    cornelltest.assert_true(stable) # Now it is stable

    # TEST CASE 3 (step)
    km2.step()

    # Check first cluster (WHICH HAS CHANGED!)
    cluster1 = km2.getClusters()[0]
    cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6], cluster1.getCentroid())
    cornelltest.assert_equals(set([1, 2]), set(cluster1.getIndices()))

    # Check second cluster (WHICH HAS CHANGED!)
    cluster2 = km2.getClusters()[1]
    cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475],cluster2.getCentroid())
    cornelltest.assert_equals(set([0, 3, 4, 5]), set(cluster2.getIndices()))

    # TEST CASE 3 (step)
    km2.step()

    # Check first cluster (WHICH HAS CHANGED!)
    cluster1 = km2.getClusters()[0]
    cornelltest.assert_float_lists_equal([8./15, 17./30, 17./30], cluster1.getCentroid())
    cornelltest.assert_equals(set([1, 2, 3]), set(cluster1.getIndices()))

    # Check second cluster (WHICH HAS CHANGED!)
    cluster2 = km2.getClusters()[1]
    cornelltest.assert_float_lists_equal([0.5, 13./30, 14./30],cluster2.getCentroid())
    cornelltest.assert_equals(set([0, 4, 5]), set(cluster2.getIndices()))
    print '    Method ClusterGroup.step looks okay'
    print '  Part C of class ClusterGroup appears correct'
    print ''
Пример #16
0
def testA():
    """Test Part A (of Part I) of the assignment. 
    
    This test procedure cannot test getCluster, as there are
    no clusters yet.  That test is moved to part B."""
    print '  Testing Part A'
    # TEST CASE 1
    # Create and test an empty database
    dbase = Database(3)
    cornelltest.assert_equals(3,dbase.getDimension())
    cornelltest.assert_equals(0,dbase.getKSize())

    # We use this BRAND NEW ASSERT to compare lists
    cornelltest.assert_float_lists_equal([],dbase.getContents())
	
	# Add something to the database (and check it was added)
    dbase.appendContents([0.0,0.5,4.2])
    # Database is a 2D-list.
    cornelltest.assert_float_lists_equal([[0.0,0.5,4.2]],dbase.getContents())
	
	# And clear it
    dbase.clearContents()
    cornelltest.assert_float_lists_equal([],dbase.getContents())
    print '    Default initialization looks okay'
	
    # TEST CASE 2	
	# Create and test a non-empty database
    items = [[0.0,0.0,0.0],[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]]
    dbase = Database(3,items)
    cornelltest.assert_equals(3,dbase.getDimension())
    cornelltest.assert_equals(0,dbase.getKSize())
	
	# Check that contents is initialized correctly
	# Make sure items is COPIED
    cornelltest.assert_float_lists_equal(items,dbase.getContents())
    cornelltest.assert_not_equals(id(items),id(dbase.getContents()))

	# Add something to the database (and check it was added)
    extra = [0.0,0.5,4.2]
    dbase.appendContents(extra)
    items.append(extra)
    cornelltest.assert_float_lists_equal(items,dbase.getContents())
    # Check the point was COPIED
    cornelltest.assert_false(id(extra) in map(id,dbase.getContents()))
	
	# And clear it
    dbase.clearContents()
    cornelltest.assert_float_lists_equal([],dbase.getContents())
    print '    User-given contents looks okay'
    print '  Part A appears correct'
Пример #17
0
def testC():
    """Test Part C (of Part I) of the assignment. 
    
    This test checks the methods both nearest and partition.
    For these checks, it has to go ahead and initialize some
    clusters (which is done in Part D).  We do this by accessing
    your hidden attributes. 
    
    Normally, this is bad programming (hidden attributes can be 
    used inside of the class definition, but not outside).  But 
    sometimes rules are meant to be broken, and testing is a good 
    time to break rules."""
    print '  Testing Part C'
    # FOR BOTH TEST CASES    
	# Create and test a non-empty database
    items = [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,0.0],[0.0,0.0,1.0]]
    dbase = Database(3,items)

    # Create two clusters
    cluster1 = Cluster([0.5,0.5,0.0])
    cluster2 = Cluster([0.0,0.0,0.5])

    # Initialize the database to use these clusters (access hidden attributes)
    # THIS VIOLATES GOOD PROGRAMMING. But sometimes rules must be broken.    
    dbase._clusters = [None,None] # Make sure the list can hold two clusters
    dbase._clusters[0] = cluster1
    dbase._clusters[1] = cluster2
    dbase._ksize = 2

    # TEST CASE 1 (distance)
    dist = cluster1.distance([1.0,0.0,-1.0])
    cornelltest.assert_floats_equal(1.22474487139,dist)

    # TEST CASE 2 (distance)
    dist = cluster1.distance([0.5,0.5,0.0])
    cornelltest.assert_floats_equal(0.0,dist)
    print '    Method distance() looks okay'
    
    # TEST CASE 3 (nearest)
    nearest = dbase.nearest([1.0,0.0,0.0])
    cornelltest.assert_equals(id(cluster1),id(nearest))

    # TEST CASE 4 (nearest)
    nearest = dbase.nearest([0.0,0.0,1.0])
    cornelltest.assert_equals(id(cluster2),id(nearest))
    print '    Method nearest() looks okay'
    
    # TEST CASE 5 (partition)
    dbase.partition() 

    # First half of list is in first cluster
    cornelltest.assert_float_lists_equal(items[:2],cluster1.getContents())

    # Second half of list is in second cluster
    cornelltest.assert_float_lists_equal(items[2:],cluster2.getContents())

    # TEST CASE 6 (partition)
    # Change the clusters
    dbase._clusters[0].setCentroid([0.0,0.0,0.5])
    dbase._clusters[1].setCentroid([0.5,0.5,0.0])
    dbase.partition() 

    # Second half of list is in first cluster
    cornelltest.assert_float_lists_equal(items[2:],cluster1.getContents())

    # First half of list is in second cluster
    cornelltest.assert_float_lists_equal(items[:2],cluster2.getContents())
    
    
    print '    Method partition() looks okay'
    print '  Part C appears correct'
Пример #18
0
def testD():
    """Test Part D (of Part I) of the assignment. 

    This test procedure shows why we are providing you with the 
    unit tests, rather than asking you to write your own.  The
    method setKSize() has a randomization element inside of it.
    It is hard to check random things, because you do not get
    the same answer each time.
    
    To get around that, we use the function random.seed(). This
    function essentially turns off the random number generator,
    and makes it return predicatable values.  For more information,
    see http://en.wikipedia.org/wiki/Pseudorandomness."""
    print '  Testing Part D (setKSize)'
    # Force the random number generator to not be random
    random.seed(1)

    # FOR BOTH TEST CASES    
	# Create and test a non-empty database
    items = [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,0.0],[0.0,0.0,1.0]]
    dbase = Database(3,items)

    # TEST CASE 1 (Change k)
    dbase.setKSize(0)
    cornelltest.assert_equals(0,dbase.getKSize())
    cornelltest.assert_float_lists_equal([],dbase._clusters)
    
    # TEST CASE 2 (Change k)
    dbase.setKSize(2)
    
    # Should create two clusters
    cornelltest.assert_equals(2,dbase.getKSize())

    # Check first cluster    
    cluster1 = dbase.getCluster(0)
    cornelltest.assert_float_lists_equal([1.0,0.0,0.0],cluster1.getCentroid())
    cornelltest.assert_float_lists_equal(items[:3],cluster1.getContents())

    # Check second cluster    
    cluster2 = dbase.getCluster(1)
    cornelltest.assert_float_lists_equal([0.0,0.0,1.0],cluster2.getCentroid())
    cornelltest.assert_float_lists_equal(items[3:],cluster2.getContents())

    # TEST CASE 3 (Change k)
    dbase.setKSize(3)

    # Should create three clusters
    cornelltest.assert_equals(3,dbase.getKSize())

    # Check first cluster    
    cluster1 = dbase.getCluster(0)
    cornelltest.assert_float_lists_equal([0.0,0.0,1.0],cluster1.getCentroid())
    cornelltest.assert_float_lists_equal(items[3:4],cluster1.getContents())

    # Check second cluster    
    cluster2 = dbase.getCluster(1)
    cornelltest.assert_float_lists_equal([0.0,1.0,0.0],cluster2.getCentroid())
    cornelltest.assert_float_lists_equal(items[1:2],cluster2.getContents())

    # Check third cluster    
    cluster3 = dbase.getCluster(2)
    cornelltest.assert_float_lists_equal([0.0,0.0,0.0],cluster3.getCentroid())
    cornelltest.assert_float_lists_equal(items[0:1]+items[2:3],cluster3.getContents())
    print '  Part D appears correct'
Пример #19
0
def testE():
    """Test Part E (of Part I) of the assignment. 

    This tests the final part of K-means.  It gets a lot easier from here.
    As with the test for Part D, we have to use random.seed to fix
    the random number generator."""
    print '  Testing Part E'
    # Force the random number generator to not be random
    random.seed(3) # More interesting result than a seed of 1
    
    # FOR ALL TEST CASES    
	# Create and initialize a non-empty database
    items = [[0.5,0.5,0.5],[0.5,0.6,0.6],[0.6,0.5,0.6],[0.5,0.6,0.5],[0.5,0.4,0.5],[0.5,0.4,0.4]]
    dbase = Database(3,items)
    dbase.setKSize(2)
    
    # PRE-TEST: Check first cluster (should be okay if passed part D)
    cluster1 = dbase.getCluster(0)
    cornelltest.assert_float_lists_equal([0.5, 0.6, 0.6],cluster1.getCentroid())
    cornelltest.assert_float_lists_equal(items[1:3],cluster1.getContents())

    # PRE-TEST: Check second cluster (should be okay if passed part D)
    cluster2 = dbase.getCluster(1)
    cornelltest.assert_float_lists_equal([0.5, 0.6, 0.5],cluster2.getCentroid())
    cornelltest.assert_float_lists_equal([items[0]]+items[3:],cluster2.getContents())
    
    # Make a copy of a cluster (to test update() method)
    clustertest = Cluster(cluster1.getCentroid())
    for point in cluster1.getContents():
        clustertest.appendContents(point)
    
    # TEST CASE 1 (update)
    stable = clustertest.update()
    cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid())
    cornelltest.assert_false(stable) # Not yet stable

    # TEST CASE 2 (update)
    stable = clustertest.update()
    cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],clustertest.getCentroid())
    cornelltest.assert_true(stable) # Now it is stable
    print '    Method update() looks okay'
    
    # TEST CASE 3 (step)
    dbase.step()
    
    # K size should be unchanged
    cornelltest.assert_equals(2,dbase.getKSize())

    # Check first cluster (WHICH HAS CHANGED!)  
    cluster1 = dbase.getCluster(0)
    cornelltest.assert_float_lists_equal([0.55, 0.55, 0.6],cluster1.getCentroid())
    cornelltest.assert_float_lists_equal(items[1:4],cluster1.getContents())

    # Check second cluster (WHICH HAS CHANGED!)  
    cluster2 = dbase.getCluster(1)
    cornelltest.assert_float_lists_equal([0.5, 0.475, 0.475],cluster2.getCentroid())
    cornelltest.assert_float_lists_equal([items[0]]+items[4:],cluster2.getContents())
    print '    Method step() looks okay'
    print '  Part E appears correct'