Exemplo n.º 1
0
 def test_cms_remove_mult(self):
     """test the removal of multiple elements at a time"""
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.add("this is a test", 16), 16)
     self.assertEqual(cms.elements_added, 16)
     self.assertEqual(cms.remove("this is a test", 4), 12)
     self.assertEqual(cms.elements_added, 12)
Exemplo n.º 2
0
 def test_cms_bytes(self):
     """test exporting a count-min sketch as bytes"""
     md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e"
     cms = CountMinSketch(width=1000, depth=5)
     cms.add("this is a test", 100)
     md5_out = hashlib.md5(bytes(cms)).hexdigest()
     self.assertEqual(md5_out, md5_val)
Exemplo n.º 3
0
 def test_cms_remove_mult(self):
     ''' test the removal of multiple elements at a time '''
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.add('this is a test', 16), 16)
     self.assertEqual(cms.elements_added, 16)
     self.assertEqual(cms.remove('this is a test', 4), 12)
     self.assertEqual(cms.elements_added, 12)
Exemplo n.º 4
0
 def test_cms_remove_single(self):
     ''' test the removal of a single element at a time '''
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.add('this is a test', 4), 4)
     self.assertEqual(cms.elements_added, 4)
     self.assertEqual(cms.remove('this is a test'), 3)
     self.assertEqual(cms.remove('this is a test'), 2)
     self.assertEqual(cms.elements_added, 2)
Exemplo n.º 5
0
 def test_cms_remove_single(self):
     ''' test the removal of a single element at a time '''
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.add('this is a test', 4), 4)
     self.assertEqual(cms.elements_added, 4)
     self.assertEqual(cms.remove('this is a test'), 3)
     self.assertEqual(cms.remove('this is a test'), 2)
     self.assertEqual(cms.elements_added, 2)
Exemplo n.º 6
0
 def test_cms_min_val(self):
     ''' test when we come to the bottom of the 32 bit int
         (stop overflow) '''
     too_large = INT64_T_MAX + 5
     cms = CountMinSketch(width=1000, depth=5)
     cms.remove('this is a test', too_large)
     self.assertEqual(cms.check('this is a test'), INT32_T_MIN)
     self.assertEqual(cms.elements_added, INT64_T_MIN)
Exemplo n.º 7
0
 def test_cms_add_mult(self):
     """test the insertion of multiple elements at a time"""
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.add("this is a test", 4), 4)
     self.assertEqual(cms.add("this is a test", 4), 8)
     self.assertEqual(cms.add("this is a test", 4), 12)
     self.assertEqual(cms.add("this is a test", 4), 16)
     self.assertEqual(cms.elements_added, 16)
Exemplo n.º 8
0
 def test_cms_remove_single(self):
     """test the removal of a single element at a time"""
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.add("this is a test", 4), 4)
     self.assertEqual(cms.elements_added, 4)
     self.assertEqual(cms.remove("this is a test"), 3)
     self.assertEqual(cms.remove("this is a test"), 2)
     self.assertEqual(cms.elements_added, 2)
Exemplo n.º 9
0
 def test_cms_add_mult(self):
     ''' test the insertion of multiple elements at a time '''
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.add('this is a test', 4), 4)
     self.assertEqual(cms.add('this is a test', 4), 8)
     self.assertEqual(cms.add('this is a test', 4), 12)
     self.assertEqual(cms.add('this is a test', 4), 16)
     self.assertEqual(cms.elements_added, 16)
Exemplo n.º 10
0
 def test_cms_add_single(self):
     ''' test the insertion of a single element at a time '''
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.add('this is a test'), 1)
     self.assertEqual(cms.add('this is a test'), 2)
     self.assertEqual(cms.add('this is a test'), 3)
     self.assertEqual(cms.add('this is a test'), 4)
     self.assertEqual(cms.elements_added, 4)
Exemplo n.º 11
0
 def test_cms_max_val(self):
     ''' test when we come to the top of the 32 bit int
         (stop overflow) '''
     too_large = INT64_T_MAX + 5
     cms = CountMinSketch(width=1000, depth=5)
     cms.add('this is a test', too_large)
     self.assertEqual(cms.check('this is a test'), INT32_T_MAX)
     self.assertEqual(cms.elements_added, INT64_T_MAX)
Exemplo n.º 12
0
 def test_cms_add_single(self):
     """test the insertion of a single element at a time"""
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.add("this is a test"), 1)
     self.assertEqual(cms.add("this is a test"), 2)
     self.assertEqual(cms.add("this is a test"), 3)
     self.assertEqual(cms.add("this is a test"), 4)
     self.assertEqual(cms.elements_added, 4)
Exemplo n.º 13
0
 def test_cms_max_val(self):
     """test when we come to the top of the 32 bit int
     (stop overflow)"""
     too_large = INT64_T_MAX + 5
     cms = CountMinSketch(width=1000, depth=5)
     cms.add("this is a test", too_large)
     self.assertEqual(cms.check("this is a test"), INT32_T_MAX)
     self.assertEqual(cms.elements_added, INT64_T_MAX)
Exemplo n.º 14
0
    def test_cms_clear(self):
        ''' test the clear functionality '''
        cms = CountMinSketch(width=1000, depth=5)
        self.assertEqual(cms.add('this is a test', 100), 100)
        self.assertEqual(cms.elements_added, 100)

        cms.clear()
        self.assertEqual(cms.elements_added, 0)
        self.assertEqual(cms.check('this is a test'), 0)
Exemplo n.º 15
0
    def test_cms_different_hash(self):
        ''' test using a different hash function '''
        cms = CountMinSketch(width=1000, depth=5)
        hashes1 = cms.hashes('this is a test')

        cms2 = CountMinSketch(width=1000, depth=5,
                              hash_function=different_hash)
        hashes2 = cms2.hashes('this is a test')
        self.assertNotEqual(hashes1, hashes2)
Exemplo n.º 16
0
 def test_cms_set_query_type(self):
     """test setting different query types"""
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.query_type, "min")
     cms.query_type = "mean-min"
     self.assertEqual(cms.query_type, "mean-min")
     cms.query_type = "mean"
     self.assertEqual(cms.query_type, "mean")
     cms.query_type = "unknown"
     self.assertEqual(cms.query_type, "min")
Exemplo n.º 17
0
 def test_cms_set_query_type(self):
     ''' test setting different query types '''
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.query_type, 'min')
     cms.query_type = 'mean-min'
     self.assertEqual(cms.query_type, 'mean-min')
     cms.query_type = 'mean'
     self.assertEqual(cms.query_type, 'mean')
     cms.query_type = 'unknown'
     self.assertEqual(cms.query_type, 'min')
Exemplo n.º 18
0
 def test_cms_set_query_type(self):
     ''' test setting different query types '''
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.query_type, 'min')
     cms.query_type = 'mean-min'
     self.assertEqual(cms.query_type, 'mean-min')
     cms.query_type = 'mean'
     self.assertEqual(cms.query_type, 'mean')
     cms.query_type = 'unknown'
     self.assertEqual(cms.query_type, 'min')
Exemplo n.º 19
0
    def test_cms_join_invalid(self):
        """test joing a cms with an invalid type"""
        cms = CountMinSketch(width=1000, depth=5)

        try:
            cms.join(1)
        except TypeError as ex:
            msg = "Unable to merge a count-min sketch with {}".format("<class 'int'>")
            self.assertEqual(str(ex), msg)
        else:
            self.assertEqual(True, False)
Exemplo n.º 20
0
 def test_cms_str(self):
     ''' test the string representation of the count-min sketch '''
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.add('this is a test', 100), 100)
     msg = ('Count-Min Sketch:\n'
            '\tWidth: 1000\n'
            '\tDepth: 5\n'
            '\tConfidence: 0.96875\n'
            '\tError Rate: 0.002\n'
            '\tElements Added: 100')
     self.assertEqual(str(cms), msg)
Exemplo n.º 21
0
    def test_cms_export(self):
        ''' test exporting a count-min sketch '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        cms = CountMinSketch(width=1000, depth=5)
        cms.add('this is a test', 100)
        cms.export(filename)
        md5_out = calc_file_md5(filename)
        os.remove(filename)

        self.assertEqual(md5_out, md5_val)
Exemplo n.º 22
0
    def test_cms_frombytes(self):
        """test loading a count-min sketch from bytes"""
        cms = CountMinSketch(width=1000, depth=5)
        cms.add("this is a test", 100)
        bytes_out = bytes(cms)

        cms2 = CountMinSketch.frombytes(bytes_out)
        self.assertEqual(bytes(cms2), bytes(cms))
        self.assertEqual(cms2.width, 1000)
        self.assertEqual(cms2.depth, 5)
        self.assertEqual(cms2.check("this is a test"), 100)
Exemplo n.º 23
0
 def test_cms_str(self):
     ''' test the string representation of the count-min sketch '''
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.add('this is a test', 100), 100)
     msg = ('Count-Min Sketch:\n'
            '\tWidth: 1000\n'
            '\tDepth: 5\n'
            '\tConfidence: 0.96875\n'
            '\tError Rate: 0.002\n'
            '\tElements Added: 100')
     self.assertEqual(str(cms), msg)
Exemplo n.º 24
0
    def test_cms_join_mismatch_depth(self):
        """test joining cms with mismatch depth"""
        cms1 = CountMinSketch(width=1000, depth=5)
        cms2 = CountMinSketch(width=1000, depth=4)

        try:
            cms1.join(cms2)
        except CountMinSketchError as ex:
            msg = "Unable to merge as the count-min sketches are mismatched"
            self.assertEqual(ex.message, msg)
        else:
            self.assertEqual(True, False)
Exemplo n.º 25
0
    def test_cms_check_min(self):
        """test checking number elements using min algorithm"""
        cms = CountMinSketch(width=1000, depth=5)
        self.assertEqual(cms.add("this is a test", 255), 255)
        self.assertEqual(cms.add("this is another test", 189), 189)
        self.assertEqual(cms.add("this is also a test", 16), 16)
        self.assertEqual(cms.add("this is something to test", 5), 5)

        self.assertEqual(cms.check("this is something to test"), 5)
        self.assertEqual(cms.check("this is also a test"), 16)
        self.assertEqual(cms.check("this is another test"), 189)
        self.assertEqual(cms.check("this is a test"), 255)
        self.assertEqual(cms.elements_added, 5 + 16 + 189 + 255)
Exemplo n.º 26
0
 def __init__(self, size):
     self.phase = 1
     self.round = 1
     self.size = size
     self.cache = []
     self.clean_counter = 0
     self.clean_set = []
     for i in range(0, size):  # init empty cache(list)
         self.cache.append(Node('-', RequestFile(0, 'txt', False)))
     self.model = train_model()
     self.cms = CountMinSketch(width=1000, depth=5)
     self.hashtable = {}  # single bucket for heavy items
     self.miss_count = 0
Exemplo n.º 27
0
    def test_cms_check_mean_called(self):
        ''' test checking number elements using mean algorithm called out '''
        cms = CountMinSketch(width=1000, depth=5)
        cms.query_type = 'mean'
        self.assertEqual(cms.add('this is a test', 255), 255)
        self.assertEqual(cms.add('this is another test', 189), 189)
        self.assertEqual(cms.add('this is also a test', 16), 16)
        self.assertEqual(cms.add('this is something to test', 5), 5)

        self.assertEqual(cms.check('this is something to test'), 5)
        self.assertEqual(cms.check('this is also a test'), 16)
        self.assertEqual(cms.check('this is another test'), 189)
        self.assertEqual(cms.check('this is a test'), 255)
        self.assertEqual(cms.elements_added, 5 + 16 + 189 + 255)
Exemplo n.º 28
0
    def test_cms_load_diff_hash(self):
        """test loading a count-min sketch from file"""
        md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e"
        with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj:
            cms = CountMinSketch(width=1000, depth=5)
            self.assertEqual(cms.add("this is a test", 100), 100)
            cms.export(fobj.name)
            md5_out = calc_file_md5(fobj.name)
            self.assertEqual(md5_out, md5_val)

            cms2 = CountMinSketch(filepath=fobj.name, hash_function=different_hash)
            self.assertEqual(cms2.elements_added, 100)
            # should not work since it is a different hash
            self.assertNotEqual(cms.check("this is a test"), True)
            self.assertNotEqual(cms.hashes("this is a test"), cms2.hashes("this is a test"))
Exemplo n.º 29
0
    def test_cms_load_diff_hash(self):
        ''' test loading a count-min sketch from file '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        cms = CountMinSketch(width=1000, depth=5)
        self.assertEqual(cms.add('this is a test', 100), 100)
        cms.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)

        cms2 = CountMinSketch(filepath=filename, hash_function=different_hash)
        self.assertEqual(cms2.elements_added, 100)
        # should not work since it is a different hash
        self.assertNotEqual(cms.check('this is a test'), True)
        self.assertNotEqual(cms.hashes('this is a test'),
                            cms2.hashes('this is a test'))
        os.remove(filename)
Exemplo n.º 30
0
    def test_cms_mismatch_hash_function(self):
        """test joining when hash functions do not match"""
        cms1 = CountMinSketch(width=1000, depth=5)
        cms2 = CountMinSketch(width=1000, depth=5, hash_function=different_hash)

        def runner():
            """runner"""
            cms1.join(cms2)

        self.assertRaises(CountMinSketchError, runner)
        try:
            cms1.join(cms2)
        except CountMinSketchError as ex:
            msg = "Unable to merge as the count-min sketches are mismatched"
            self.assertEqual(ex.message, msg)
        else:
            self.assertEqual(True, False)
Exemplo n.º 31
0
 def test_cms_init_wd(self):
     """Test count-min sketch initialization using depth and width"""
     cms = CountMinSketch(width=1000, depth=5)
     self.assertEqual(cms.width, 1000)
     self.assertEqual(cms.depth, 5)
     self.assertEqual(cms.confidence, 0.96875)
     self.assertEqual(cms.error_rate, 0.002)
     self.assertEqual(cms.elements_added, 0)
Exemplo n.º 32
0
 def test_cms_init_ce(self):
     """Test count-min sketch initialization using confidence and error
     rate"""
     cms = CountMinSketch(confidence=0.96875, error_rate=0.002)
     self.assertEqual(cms.width, 1000)
     self.assertEqual(cms.depth, 5)
     self.assertEqual(cms.confidence, 0.96875)
     self.assertEqual(cms.error_rate, 0.002)
     self.assertEqual(cms.elements_added, 0)
Exemplo n.º 33
0
    def test_cms_load(self):
        """test loading a count-min sketch from file"""
        md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e"
        with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj:
            cms = CountMinSketch(width=1000, depth=5)
            self.assertEqual(cms.add("this is a test", 100), 100)
            cms.export(fobj.name)
            md5_out = calc_file_md5(fobj.name)
            self.assertEqual(md5_out, md5_val)

            # try loading directly to file!
            cms2 = CountMinSketch(filepath=fobj.name)
            self.assertEqual(cms2.elements_added, 100)
            self.assertEqual(cms2.check("this is a test"), 100)
Exemplo n.º 34
0
    def test_cms_join_mixed_types(self):
        """test count-min, count-mean, and count-meanmin joining"""
        cms = CountMinSketch(width=1000, depth=5)
        cmeans = CountMeanSketch(width=1000, depth=5)
        cmms = CountMeanMinSketch(width=1000, depth=5)

        cms.add("this is a test", 500)
        cmeans.add("this is another test", 500)
        cmms.add("this is yet another test", 500)

        cms.join(cmeans)
        self.assertTrue("this is a test" in cms)
        self.assertTrue("this is another test" in cms)
        self.assertFalse("this is yet another test" in cms)

        cmeans.join(cmms)
        self.assertFalse("this is a test" in cmeans)
        self.assertTrue("this is another test" in cmeans)
        self.assertTrue("this is yet another test" in cmeans)
        self.assertFalse("foobar" in cmeans)

        cmms.join(cms)
        self.assertTrue("this is a test" in cmms)
        self.assertTrue("this is another test" in cmms)
        self.assertTrue("this is yet another test" in cmms)
        self.assertFalse("this is yet another test!" in cmms)
Exemplo n.º 35
0
 def test_cms_init_error_msg(self):
     ''' Test count-min sketch initialization without enough params '''
     try:
         CountMinSketch(width=1000)
     except InitializationError as ex:
         msg = ('Must provide one of the following to initialize the '
                'Count-Min Sketch:\n'
                '    A file to load,\n'
                '    The width and depth,\n'
                '    OR confidence and error rate')
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
Exemplo n.º 36
0
    def test_cms_different_hash(self):
        """test using a different hash function"""
        cms = CountMinSketch(width=1000, depth=5)
        hashes1 = cms.hashes("this is a test")

        cms2 = CountMinSketch(width=1000, depth=5, hash_function=different_hash)
        hashes2 = cms2.hashes("this is a test")
        self.assertNotEqual(hashes1, hashes2)
Exemplo n.º 37
0
    def test_cms_load(self):
        ''' test loading a count-min sketch from file '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        cms = CountMinSketch(width=1000, depth=5)
        self.assertEqual(cms.add('this is a test', 100), 100)
        cms.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)

        # try loading directly to file!
        cms2 = CountMinSketch(filepath=filename)
        self.assertEqual(cms2.elements_added, 100)
        self.assertEqual(cms2.check('this is a test'), 100)
        os.remove(filename)
Exemplo n.º 38
0
    def test_cms_clear(self):
        ''' test the clear functionality '''
        cms = CountMinSketch(width=1000, depth=5)
        self.assertEqual(cms.add('this is a test', 100), 100)
        self.assertEqual(cms.elements_added, 100)

        cms.clear()
        self.assertEqual(cms.elements_added, 0)
        self.assertEqual(cms.check('this is a test'), 0)
Exemplo n.º 39
0
 def test_cms_export(self):
     """test exporting a count-min sketch"""
     md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e"
     with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj:
         cms = CountMinSketch(width=1000, depth=5)
         cms.add("this is a test", 100)
         cms.export(fobj.name)
         md5_out = calc_file_md5(fobj.name)
     self.assertEqual(md5_out, md5_val)
Exemplo n.º 40
0
    def test_cms_clear(self):
        """test the clear functionality"""
        cms = CountMinSketch(width=1000, depth=5)
        self.assertEqual(cms.add("this is a test", 100), 100)
        self.assertEqual(cms.elements_added, 100)

        cms.clear()
        self.assertEqual(cms.elements_added, 0)
        self.assertEqual(cms.check("this is a test"), 0)
Exemplo n.º 41
0
    def test_cms_load_diff_hash(self):
        ''' test loading a count-min sketch from file '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        cms = CountMinSketch(width=1000, depth=5)
        self.assertEqual(cms.add('this is a test', 100), 100)
        cms.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)

        cms2 = CountMinSketch(filepath=filename, hash_function=different_hash)
        self.assertEqual(cms2.elements_added, 100)
        # should not work since it is a different hash
        self.assertNotEqual(cms.check('this is a test'), True)
        self.assertNotEqual(cms.hashes('this is a test'),
                            cms2.hashes('this is a test'))
        os.remove(filename)
Exemplo n.º 42
0
    def test_cms_export(self):
        ''' test exporting a count-min sketch '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        cms = CountMinSketch(width=1000, depth=5)
        cms.add('this is a test', 100)
        cms.export(filename)
        md5_out = calc_file_md5(filename)
        os.remove(filename)

        self.assertEqual(md5_out, md5_val)
Exemplo n.º 43
0
    def test_cms_load(self):
        ''' test loading a count-min sketch from file '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        cms = CountMinSketch(width=1000, depth=5)
        self.assertEqual(cms.add('this is a test', 100), 100)
        cms.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)

        # try loading directly to file!
        cms2 = CountMinSketch(filepath=filename)
        self.assertEqual(cms2.elements_added, 100)
        self.assertEqual(cms2.check('this is a test'), 100)
        os.remove(filename)