Exemplo n.º 1
0
class SeperateChainHashTable_with_BSTHashBucket_Test_With_1_Elements(
        unittest.TestCase):
    def setUp(self):
        self._chained_hash_table = SeperateChainHashTable(
            bucket_type_class=BSTHashBucket)
        self._key = 'some_key'
        self._value = 'some_value'
        self._chained_hash_table[self._key] = self._value

    def test_get_and_len_on_single_entry_hash_table(self):
        self.assertEqual(self._chained_hash_table.get(key=self._key),
                         self._value, 'get on existing key failed')
        self.assertEqual(self._chained_hash_table[self._key], self._value,
                         'get on existing key failed')

        self.assertEqual(len(self._chained_hash_table), 1,
                         'single entry hash table length must be 1')

    def test_del_single_entry_hash_table(self):
        del self._chained_hash_table[self._key]
        try:
            del self._chained_hash_table[self._key]
        except Exception as e:
            self.assertEqual(
                e.__class__.__name__, KeyError.__name__,
                'deleting non-existing key did not raise key error')

    def tearDown(self):
        self._chained_hash_table = None
Exemplo n.º 2
0
class SeperateChainHashTable_with_BSTHashBucket_Test_Capacity_On_Resize(
        unittest.TestCase):
    def setUp(self):
        self._chained_hash_table = SeperateChainHashTable(
            bucket_type_class=BSTHashBucket)
        self._default_initial_capacity = 17
        for i in list_of_strings_used_as_keys_and_values:  # 1 to 17 default initial capacity is 17
            self._chained_hash_table[i] = i

    def test_capacity_on_resize(self):
        for i in list_of_strings_used_as_keys_and_values:
            self.assertEqual(self._chained_hash_table.get(key=i), i,
                             'get on existing key failed')
            self.assertEqual(self._chained_hash_table[i], i,
                             'get on existing key failed')
        #would have resized but number of items would be same
        self.assertEqual(len(self._chained_hash_table),
                         self._default_initial_capacity,
                         'hash table length does not add up')
        #but table size would have doubled
        self.assertEqual(self._chained_hash_table.current_capacity,
                         self._default_initial_capacity * 2,
                         'hash table capacity does not add up')

    def tearDown(self):
        self._chained_hash_table = None
Exemplo n.º 3
0
class SeperateChainHashTable_with_BSTHashBucket_Test_With_0_Elements(
        unittest.TestCase):
    def setUp(self):
        self._chained_hash_table = SeperateChainHashTable(
            bucket_type_class=BSTHashBucket)

    def test_get_empty_hash_table(self):
        non_existing_key = "SomeKey"
        self.assertEqual(self._chained_hash_table.get(key=non_existing_key),
                         None, 'get on Non existing key must return None')
        with self.assertRaises(KeyError):
            self._chained_hash_table[non_existing_key]

    def test_del_empty_hash_table(self):
        non_existing_key = "SomeKey"
        try:
            del self._chained_hash_table[non_existing_key]
        except Exception as e:
            self.assertEqual(
                e.__class__.__name__, KeyError.__name__,
                'deleting non-existing key did not raise key error')

    def test_len_empty_hash_table(self):
        self.assertEqual(len(self._chained_hash_table), 0,
                         'Empty hash table length must be 0')

    def test_has_key_empty_hash_table(self):
        non_existing_key = "SomeKey"
        self.assertEqual(
            self._chained_hash_table.has_key(key=non_existing_key), 0,
            'Empty hash table length must be 0')

    def tearDown(self):
        self._chained_hash_table = None
Exemplo n.º 4
0
class SeperateChainHashTable_with_BSTHashBucket_Test_Multiple_Resizes(
        unittest.TestCase):
    def setUp(self):
        self._chained_hash_table = SeperateChainHashTable(
            bucket_type_class=BSTHashBucket)
        self._default_initial_capacity = 17
        self._length_upper_limit = 1001  #always choose some multiple of 100 + 1

    def test_get_and_len_with_a_truck_load_of_resizes(self):

        for i in range(1, self._length_upper_limit):  # will trigger resize
            self._chained_hash_table[i] = i

        for i in range(1, self._length_upper_limit):  # will trigger
            self.assertEqual(self._chained_hash_table.get(key=i), i,
                             'get on existing key failed')
            self.assertEqual(self._chained_hash_table[i], i,
                             'get on existing key failed')
        #would have resized but number of items would be same
        self.assertEqual(len(self._chained_hash_table),
                         self._length_upper_limit - 1,
                         'hash table length does not add up')
        #but table size would have doubled
        current_capacity_would_be = 2176  #coz 13 is where a default size 17 table resizes at first.
        self.assertEqual(self._chained_hash_table.current_capacity,
                         current_capacity_would_be,
                         'hash table capacity does not add up')

    def test_del_then_get_and_len_with_a_truck_load_of_resizes(self):

        for i in range(1, self._length_upper_limit):  # will trigger resize
            self._chained_hash_table[i] = i

        rolling_length = len(self._chained_hash_table)
        for i in range(1, self._length_upper_limit):  # will trigger
            del self._chained_hash_table[i]
            rolling_length = rolling_length - 1
            self.assertEqual(self._chained_hash_table.get(key=i), None,
                             'get on Non existing key must return None')
            with self.assertRaises(KeyError):
                self._chained_hash_table[i]
            self.assertEqual(len(self._chained_hash_table), rolling_length,
                             'hash table length did not add up after delete')

    def tearDown(self):
        self._chained_hash_table = None
Exemplo n.º 5
0
class SeperateChainHashTable_with_BSTHashBucket_Test_With_Less_Than_Initial_Capacity_Elements_Cause_No_Resize(
        unittest.TestCase):
    '''
    By adding 5 elements to an initial capacity of 17 the load factor is just 0.2941 which is < 0.75 loadfactor threshold for resize.
    '''
    def setUp(self):
        self._chained_hash_table = SeperateChainHashTable(
            bucket_type_class=BSTHashBucket)
        self._default_initial_capacity = 5
        for i in list_of_strings_used_as_keys_and_values[0:5]:
            self._chained_hash_table[i] = i

    def test_get_and_len_on_less_than_initial_capacity_hash_table(self):
        for i in list_of_strings_used_as_keys_and_values[0:5]:
            self.assertEqual(self._chained_hash_table.get(key=i), i,
                             'get on existing key failed')
            self.assertEqual(self._chained_hash_table[i], i,
                             'get on existing key failed')

        self.assertEqual(
            len(self._chained_hash_table), self._default_initial_capacity,
            'hash table length must be same as full initial capacity')

    def test_del_then_get_and_len_on_less_thaninitial_capacity_hash_table(
            self):
        rolling_capacity = self._default_initial_capacity
        for i in list_of_strings_used_as_keys_and_values[0:5]:
            del self._chained_hash_table[i]
            rolling_capacity = rolling_capacity - 1
            self.assertEqual(self._chained_hash_table.get(key=i), None,
                             'get on Non existing key must return None')
            with self.assertRaises(KeyError):
                self._chained_hash_table[i]
            self.assertEqual(len(self._chained_hash_table), rolling_capacity,
                             'hash table length did not add up after delete')

    def tearDown(self):
        self._chained_hash_table = None
Exemplo n.º 6
0
class SeperateChainHashTable_with_BSTHashBucket_Test_With_Full_Initial_Capacity_Elements_Forces_Resize(
        unittest.TestCase):
    def setUp(self):
        self._chained_hash_table = SeperateChainHashTable(
            bucket_type_class=BSTHashBucket)
        self._default_initial_capacity = 17
        for i in list_of_strings_used_as_keys_and_values:  # 1 to 17 default initial capacity is 17
            self._chained_hash_table[i] = i

    def test_get_and_len_on_full_initial_capacity_hash_table(self):
        for i in list_of_strings_used_as_keys_and_values:
            self.assertEqual(self._chained_hash_table.get(key=i), i,
                             'get on existing key failed')
            self.assertEqual(self._chained_hash_table[i], i,
                             'get on existing key failed')
        #would have resized but number of items would be same
        self.assertEqual(len(self._chained_hash_table),
                         self._default_initial_capacity,
                         'hash table length does not add up')
        #but table size would have doubled
        self.assertEqual(self._chained_hash_table.current_capacity,
                         self._default_initial_capacity * 2,
                         'hash table capacity does not add up')

    def test_del_then_get_and_len_on_full_initial_capacity_hash_table(self):
        rolling_capacity = len(self._chained_hash_table)
        for i in list_of_strings_used_as_keys_and_values:
            del self._chained_hash_table[i]
            rolling_capacity = rolling_capacity - 1
            self.assertEqual(self._chained_hash_table.get(key=i), None,
                             'get on Non existing key must return None')
            with self.assertRaises(KeyError):
                self._chained_hash_table[i]
            self.assertEqual(len(self._chained_hash_table), rolling_capacity,
                             'hash table length did not add up after delete')

    def tearDown(self):
        self._chained_hash_table = None
Exemplo n.º 7
0
class InternStore(object):
    '''
    Several objects of a class can have the same internal state. Holding all these objects 
    in memory is a waste. InternStore holds all these same internal state objects to 
    refer to a single weak reference object. For this to work all the objects you send to the
    intern store must be hashable. Hash of the objects must return the same hash for the 
    same internal states. 
    '''
    def __init__(self):
        self._store = SeperateChainHashTable()

    def intern(self, obj):
        '''
        an object obj is checked against the intern store and if there is an existing weakref
        with hash = hash(obj) then that instance is returned. If no entry is found a weak 
        reference to obj is created and stored.
        '''
        if obj == None:
            return None

        class_of_obj = type(obj)
        map_of_class_to_weak_refs_of_obj = self._store.get(class_of_obj,
                                                           default=None)
        if map_of_class_to_weak_refs_of_obj is None:
            map_of_class_to_weak_refs_of_obj = weakref.WeakValueDictionary()
            self._store[class_of_obj] = map_of_class_to_weak_refs_of_obj

        weak_ref_to_obj = map_of_class_to_weak_refs_of_obj.get(obj,
                                                               default=None)
        strong_ref_obj = None

        if weak_ref_to_obj is not None:
            strong_ref_obj = weak_ref_to_obj
            return strong_ref_obj
        else:
            map_of_class_to_weak_refs_of_obj[obj] = obj
            strong_ref_obj = obj
            return strong_ref_obj
Exemplo n.º 8
0
class DisjointSetWithUnion(object):
    '''
    A disjoint set holds a set of items. The set may also hold subset of items.
    
    A disjoint set DJS has a number of elements from a fixed Universe U. DJS knows all the elements in it.
    The elements in DJS may themselves form subsets. i.e U = { a, b, c, d, r, f, g}. DJS can be
    { {a, g}, {b}, {d}, {c, r, f} }. Disjoint set allows 3 operations.
    
    1) make_set(x) returns a new set {x}
    2) find(x) return R such that x is in set R. for example {c, r, f} be identified by c. then find(r) = c
       The set to which an item belongs is represented by one of the items in the set. This representative item is usually at
       the root node of the uptree that links together all the items in the set.
    3) union(s, t): merges the smaller set into the larger. {c} U {r, f} = {c, r, f}
    for more info please refer Datastructures and their algorithms by Harry Lewis and Larry Denenberg
    
    Uptree nodes are used to hold the individual items of the set.
    
    1) Each item needs to be known in the set. We use a hashtable where table[item] = UpTree Node for the item
    2) make_set(item) just creates an uptree node with item. Each node starts off as a tree rooted at node.
    3) Iter on Disjoint set results in yielding (item, item's set) tuples.    
    '''
    def __init__(self):
        self._table_of_uptrees = SeperateChainHashTable(
            bucket_type_class=SplayedHashBucket)

    @property
    def size(self):
        return len(self._table_of_uptrees)

    def make_set(self, item=None):
        if item == None:
            return None
        uptree_node = UpTreeNode(node_element=item)  #single node tree
        self._table_of_uptrees[item] = uptree_node
        return uptree_node

    def _uptree_find_with_path_compression(self, item=None):
        '''
        Return the root of the uptree to which item belongs. Compress the path before leaving.
        '''
        uptree_node_with_item = self._table_of_uptrees.get(item, None)
        if uptree_node_with_item == None:
            raise KeyError('Key Error: %s ' % repr(item))
        #before returning the root node element, we set the parent of the original node to the parent.
        current_node = uptree_node_with_item
        while current_node.parent_node != None:
            current_node = current_node.parent_node
        #we have root node
        root_of_uptree = current_node
        #compress path
        current_node = uptree_node_with_item
        parent_of_current_node = current_node.parent_node
        while current_node != root_of_uptree:
            parent_of_current_node = current_node.parent_node
            current_node.parent_node = root_of_uptree
            current_node = parent_of_current_node

        return root_of_uptree

    def find(self, item):
        '''
        returns the set to which this item belongs. The set to which an item belongs is represented by the item at root of the set.
        '''
        try:
            root_node = self._uptree_find_with_path_compression(item)
        except KeyError:
            return None
        return root_node.node_element

    def union(self, item_1=None, item_2=None):
        '''
        Finds the set for item_1 and item_2. Merges the two and returns the larger set.
        '''
        if item_1 == None or item_2 == None:
            return None

        uptree_1 = self._uptree_find_with_path_compression(
            item=item_1)  #root actually
        uptree_2 = self._uptree_find_with_path_compression(
            item=item_2)  #root actually

        #return the remaining dominating tree as first element of the tuple
        if uptree_1.node_count >= uptree_2.node_count:
            uptree_2.parent_node = uptree_1
            uptree_1.node_count = uptree_1.node_count + uptree_2.node_count
            return uptree_1.node_element
        else:
            uptree_1.parent_node = uptree_2
            uptree_2.node_count = uptree_2.node_count + uptree_1.node_count
            return uptree_2

    def __iter__(self):
        '''
        Iter on Disjoint set results in yielding (item, item's set) tuples
        '''
        item_in_table = None
        for key in self._table_of_uptrees:
            item_in_table = self._table_of_uptrees[
                key].node_element  #hash table entries are uptree nodes
            yield (item_in_table, self.find(item_in_table))
    #add key, value pairs
    kvpairs = [(5, 5), (2, 2), (7, 7), (1, 1), (3, 3), (9, 9), (8, 8), (4, 4),
               (6, 6)]
    for kvpair in kvpairs:
        cht[kvpair[0]] = kvpair[1]

    print_items_in_hash_table(cht)

    #check if hash table has a key
    key_to_check = 10
    print('Key %s found in hashtable %s' %
          (str(key_to_check), key_to_check in cht))

    #get item with default if not found
    key_to_get = 20
    print('Get value for key %s value is %s' %
          (str(key_to_get), cht.get(key_to_get)))

    #replace
    key_to_replace_at = 1
    new_value = 111
    print('Replacing item at key %s with %s' %
          (str(key_to_replace_at), str(new_value)))
    cht[1] = 111

    #delete
    key_to_delete = 7
    print('Deleting key %s' % str(key_to_delete))
    del cht[7]
    print_items_in_hash_table(cht)