def calc_dgim(x, all_L): dgim = Dgim(100, 0.1) for j in range(0, len(all_L[0])): dgim.update(all_L[x][j]) dgim_result = dgim.get_count() # print('The result of dgim of ' + str(i) + ' is ' + str(dgim_result)) return dgim_result
def test_get_count(self): dgim = Dgim(12) stream = iter([ False, False, True, False, True, True, True, False, True, True, False, False, True, False, True, True, False ]) for elt in stream: dgim.update(elt) self.assertEquals(7, dgim.get_count())
def test_is_bucket_too_old(self): dgim = Dgim(10) dgim._timestamp = 15 self.assertFalse(dgim._is_bucket_too_old(6)) self.assertTrue(dgim._is_bucket_too_old(5)) self.assertTrue(dgim._is_bucket_too_old(16)) dgim._timestamp = 5 self.assertFalse(dgim._is_bucket_too_old(16)) self.assertTrue(dgim._is_bucket_too_old(15))
def DGIM(path1): fp = get_directory(path1) ret = 'DGIM Analysis\n' for path in fp: name = os.path.basename(path)[:-4] (data, date) = moving_average(path, 30) stream = stream_input(data) l = len(stream) dgim = Dgim(N=l) for i in range(l): dgim.update(stream[i]) result = dgim.get_count() ret = ret + f'There are {result} 1s in {name}\n' return ret
def test_get_count_without_update(self): """Example from chapter 4 of "Mining of Massing Datasets""" crt_timestamp = 65 # hand built dgim queues = [ deque([crt_timestamp - 1, crt_timestamp - 2]), deque([crt_timestamp - 4]), deque([crt_timestamp - 8]), deque() ] dgim = Dgim(10) dgim._timestamp = crt_timestamp dgim._queues = queues self.assertEquals(6, dgim.get_count())
def test_bucket_drop(self): crt_timestamp = 65 # hand built dgim queues = [ deque([crt_timestamp - 1, crt_timestamp - 2]), deque([crt_timestamp - 4]), deque() ] dgim = Dgim(6) dgim._timestamp = crt_timestamp dgim._queues = queues dgim._oldest_bucket_timestamp = crt_timestamp - 4 self.assertEquals(3, dgim.nb_buckets) dgim.update(0) self.assertEquals(3, dgim.nb_buckets) dgim.update(0) self.assertEquals(2, dgim.nb_buckets)
def main(): N = 1000000 error_rate = 0.5 length = 2 * N dgim = Dgim(N=N, error_rate=error_rate) stream = generate_random_stream(length=length) time_start = time.time() profile_dgim(dgim, stream) time_stop = time.time() print "Took: {}s".format(time_stop - time_start) import gc gc.collect() time.sleep(5)
def test_r_computation(self): dgim = Dgim(10, 0.5) self.assertEqual(2, dgim._r) dgim = Dgim(10, 0.1) self.assertEqual(10, dgim._r)
def test_only_zeros(self): dgim = Dgim(10) for elt in itertools.repeat(False, 1000): dgim.update(elt) self.assertEqual(0, dgim.get_count())
def test_N_is_two(self): dgim = Dgim(2) dgim.update(True) self.assertEqual(1, dgim.get_count()) dgim.update(True) self.assertEqual(2, dgim.get_count()) dgim.update(True) self.assertEqual(2, dgim.get_count())
def test_N_is_one(self): dgim = Dgim(1) dgim.update(True) self.assertEqual(1, dgim.get_count()) dgim.update(False) self.assertEqual(0, dgim.get_count())
def test_N_is_null(self): dgim = Dgim(0) stream = iter([True, False, False, True]) for elt in stream: dgim.update(elt) self.assertEquals(0, dgim.get_count())
def test_count_empty_stream(self): dgim = Dgim(10) self.assertEqual(0, dgim.get_count())