示例#1
0
 def _bucket_setitem(self, j, k, v):
     if self._table[j] is None:
         self._table[j] = UnsortedTableMap()  # new a bucket
     oldsize = len(self._table[j])
     self._table[j][k] = v
     if oldsize < len(self._table[j]):
         self._n += 1
示例#2
0
 def _bucket_setitem(self, j, k, v):
     if self._table[j] is None:
         self._table[j] = UnsortedTableMap()  # bucket is new to the table
     old_size = len(self._table[j])
     self._table[j][k] = v
     if len(self._table[j]) > old_size:  # key is new to the table
         self._n += 1  # increase overall map size
 def __init__(self, original, preprocessed, typeOfMap ,index=None ):
     self.__original = original
     self.__preprocessed = preprocessed
     self.__typeOfMap = typeOfMap
     if self.__typeOfMap:
         if self.__typeOfMap == 'avl':
             self.__map = AVLTreeMap()
         elif self.__typeOfMap == 'unsorted':
             self.__map = UnsortedTableMap()
         elif self.__typeOfMap == 'sorted':
             self.__map = SortedTableMap()
         elif self.__typeOfMap == 'chain':
             self.__map = ChainHashMap()
         elif self.__typeOfMap == 'probe':
             self.__map = ProbeHashMap()
         elif self.__typeOfMap == 'splay':
             self.__map = SplayTreeMap()
         elif self.__typeOfMap == 'rb':
             self.__map = RedBlackTreeMap()
         elif self.__typeOfMap == 'dict':
             self.__map = dict()
         elif self.__typeOfMap == 'od':
             self.__map = OrderedDict()
     self.__indexFile = index
     self.__stats = [0, 0, 0]
    def _bucket_setitem(self, j, k, v):
        if self._table[j] is None:  # check if the entry  is new
            self._table[j] = UnsortedTableMap()
        oldsize = len(self._table[j])
        self._table[j][k] = v

        if len(self._table) > oldsize:  # key is new entry to the table
            self._n += 1  # increase in the overall map size
 def _bucket_setitem(self, j, k, v):
     if self._table[j] is None:
         self._table[j] = UnsortedTableMap()
     oldsize = len(self._table[j])
     self._table[j][k] = v
     # set has two scenarios: 1 add, 2 update
     # only add will increase n
     if len(self._table[j]) > oldsize:
         self._n += 1
示例#6
0
 def _bucket_setitem(self, j, k, v):
     if self._table[j] is None:
         self._table[j] = UnsortedTableMap()
     #deal with newly added item in bucket and update self._n
     oldsize = len(self._table[j])
     #set newly added item; an update of value with the same key
     #will not affect self._n
     self._table[j][k] = v
     if len(self._table) > oldsize:
         self._n += 1
示例#7
0
    def _set_bucket_item(self, bucket_idx, key, value):
        bucket = self._table[bucket_idx]
        if bucket is None:
            self._table[bucket_idx] = UnsortedTableMap()

        previous_bucket_size = len(self._table[bucket_idx])
        self._table[bucket_idx][key] = value
        after_bucket_size = len(self._table[bucket_idx])
        if after_bucket_size > previous_bucket_size:
            self._number_of_elements += 1
示例#8
0
 def _bucket_setitem(self, j, k, v) -> None:
     """
     Args:
         v (object): The new value to set. 
     """
     if self._table[j] is None:
         self._table[j] = UnsortedTableMap()  # bucket is new to the table
     oldsize = len(self._table[j])
     self._table[j][k] = v
     if len(self._table[j]) > oldsize:  # key was new to the table
         self._n += 1  # increase overall map size
class TestSimpleTable(unittest.TestCase):
    """Basic functionality tests using a simple table."""
    def setUp(self):
        self.table = UnsortedTableMap()

    def test_init(self):
        self.assertIsNotNone(self.table)
        self.assertIsInstance(self.table, UnsortedTableMap)

    def test_setitem_getitem(self):
        key = "test key"
        value = "test value"
        self.table[key] = value
        self.assertEqual(self.table[key], value)

    def test_getitem_raises_keyerror(self):
        """Does __getitem__ raise KeyError when the key isn't in the table?"""
        key = "missing"
        with self.assertRaises(KeyError):
            return self.table[key]

    def test_setitem_update_existing(self):
        key = "test"
        value = 0
        self.table[key] = value
        new_value = 1
        self.table[key] = new_value
        self.assertEqual(self.table[key], new_value)

    def test_delitem(self):
        assert len(self.table) == 0
        key = "delete me"
        value = 1
        self.table[key] = value
        del self.table[key]
        self.assertEqual(len(self.table), 0)
        with self.assertRaises(KeyError):
            return self.table[key]

    def test_delitem_raises_keyerror(self):
        """Does __delitem__ raise KeyError when trying to delete a key-value
        pair that's not in the map?"""
        key = "missing"
        with self.assertRaises(KeyError):
            del self.table[key]

    def test_iter(self):
        items = {"a": 1, "b": 2, "c": 3}
        for key in items.keys():
            self.table[key] = items[key]
        assert len(self.table) == 3
        for key in self.table.keys():
            self.assertIsNotNone(self.table[key])
示例#10
0
class Indexer:
    """A class for indexing preprocessed text documents."""
    __structures = {'avl': AVLTreeMap(), 'unsorted': UnsortedTableMap(),
                    'sorted': SortedTableMap(), 'chain': ChainHashMap(),
                    'probe': ProbeHashMap(), 'splay': SplayTreeMap(),
                    'rb': RedBlackTreeMap(), 'dict': dict(),
                    'od': OrderedDict()}
    __names = {'avl': 'AVL Tree Map', 'unsorted': 'Unsorted Table Map',
               'sorted': 'Sorted Table Map', 'chain': 'Chain Hash Map',
               'probe': 'Probe Hash Map', 'splay': 'Splay Tree Map',
               'rb': 'Red and Black Tree Map', 'dict': 'Python Dictionary',
               'od': 'Python Ordered Dictionary'}

    def __init__(self, original, preprocessed, indexed=None, map_type='rb'):
        self.__pre_file = open(preprocessed, 'r', encoding='utf-8-sig')
        self.__org_file = open(original, 'r', encoding='utf-8-sig')
        self.__map_type = map_type
        self._mapFix(self.__map_type)
        self.__multimap = self.__structures[self.__map_type]
        self.__average = 0
        self.__median = 0
        self.__indexing_time = 0
        self.__index_out = indexed

    def _mapFix(self, map_type):
        if map_type not in self.__structures:
            self.__map_type = 'avl'

    def index(self):
        """ reads the preprocessed file and indexes the words."""
        initial_time = time()
        total_terms = 0
        for i, line in enumerate(self.__pre_file):
            line_num = i + 1
            for word in line.strip().split():
                try:
                    self.__multimap[word].append(line_num)
                    total_terms += 1
                except:
                    self.__multimap[word] = [line_num]
                    total_terms += 1
        self.__indexing_time = time() - initial_time
        print('Indexing duration is {} seconds.'.format(
            round(self.__indexing_time, 4)))
        self.__average = total_terms / len(self.__multimap)
        self._find_median()

    def dump(self):
        """Writes the index list to a file"""
        if self.__index_out is not None:
            out_file = open(self.__index_out, 'w')
            for word in self.__multimap:
                lines = str(self.__multimap[word])[1:-1]
                output = '{} {}\n'.format(word, lines)
                out_file.write(word + ' ' + lines + '\n')
            out_file.close()

    def _find_median(self):
        frequencies = list()
        for key in self.__multimap:
            frequencies.append(len(self.__multimap[key]))
        frequencies.sort()
        self.__median = frequencies[len(frequencies) // 2]

    def _search(self, keyword):
        initial_time = time()
        lines = self.__multimap[keyword]
        search_time = time() - initial_time
        keyword = keyword
        for i, text in enumerate(self.__org_file):
            line_num = i + 1
            if line_num in lines:
                print('{1}: {0}'.format(text.strip(), line_num))
        self.__org_file.seek(0)  # resets buffer for next searches
        print('\nIt took {:.12f} seconds to find {} occurrence '
              'of {!r}.'.format(search_time, len(lines), keyword))

    def startUI(self):
        """Runs a loop and for a word. Return occurrence and lines
         it appeared on"""
        print('This search is powered by {}.'.format(
            self.__names[self.__map_type]))
        while True:
            try:
                keyword = input('Enter a word to search for: ').lower()
                if len(keyword) < 3 or not keyword.isalpha():
                    raise ValueError()
                self._search(keyword)
            except KeyError:
                print("Sorry! We couldn't find {!r} in "
                      "the file.\n".format(keyword))
            except RecursionError:
                print("Structure recursion limit has exceeded, please try"
                      " another map!")
            except ValueError:
                print('Invalid Term!\n\tOnly alphabetical words with three or'
                      ' more characters are allowed!')
            except:
                print('Error has been occurred!')
            if input("Quit? (y/n): ").lower().startswith('y'):
                break

    def __repr__(self):
        """prints the stats table."""
        output = 'Total indexed terms:\t{}\n'.format(len(self.__multimap))
        output += 'Average word frequency:\t{}\n'.format(
            round(self.__average, 2))
        output += 'Median word frequency:\t{}\n'.format(self.__median)
        return output
示例#11
0
 def setUp(self):
     self.table = UnsortedTableMap()