def test_apply_blacklist(self): uuids = [ "02345678123456781234567812345678", "12345678123456781234567812345678", "22345678123456781234567812345678", "32345678123456781234567812345678", "42345678123456781234567812345678" ] cons = TrailDBConstructor('blacklist_testtrail', ['field1', 'field2']) for uuid in uuids: cons.add(uuid, 1, ['a', '1']) cons.add(uuid, 2, ['b', '2']) cons.add(uuid, 3, ['c', '3']) cons.finalize() tdb = TrailDB('blacklist_testtrail') blacklist = [uuids[1], uuids[2]] tdb.apply_blacklist(blacklist) found_trails = list(tdb.trails(parsetime=False)) for trail_uuid, trail_events in found_trails: if trail_uuid in blacklist: expected_length = 0 else: expected_length = 3 trail_events = list(trail_events) self.assertEqual(len(trail_events), expected_length)
def test_crumbs(self): db = TrailDB('testtrail.tdb') n = 0 for uuid, trail in db.trails(): n += 1 self.assertEqual(self.uuid, uuid) self.assertIsInstance(trail, TrailDBCursor) self.assertEqual(3, len(list(trail))) self.assertEqual(1, n)
def test_crumbs(self): db = TrailDB('testtrail.tdb') n = 0 for uuid, trail in db.trails(): n += 1 self.assertEqual(self.uuid, uuid) self.assertIsInstance(trail, TrailDBCursor) self.assertEqual(3, len(list(trail))) self.assertEqual(1, n)
def loading(): traildb = TrailDB("/mnt/data/wikipedia-history-small.tdb") user_edits = 0 ip_edits = 0 for uuid, trail in traildb.trails(): for event in trail: if event.user != "": user_edits += 1 elif event.ip != "": ip_edits += 1 print("User edits: {}".format(user_edits)) print("IP edits: {}".format(ip_edits))
def test_trails_selected_uuids(self): uuids = [ "02345678123456781234567812345678", "12345678123456781234567812345678", "22345678123456781234567812345678", "32345678123456781234567812345678", "42345678123456781234567812345678" ] cons = TrailDBConstructor('whitelist_testtrail', ['field1', 'field2']) for uuid in uuids: cons.add(uuid, 1, ['a', '1']) cons.add(uuid, 2, ['b', '2']) cons.add(uuid, 3, ['c', '3']) cons.finalize() tdb = TrailDB('whitelist_testtrail') whitelist = [uuids[0], uuids[3], uuids[4]] expected_length = 3 for trail_uuid, trail_events in tdb.trails(selected_uuids=whitelist): trail_events = list(trail_events) self.assertEqual(len(trail_events), expected_length)
def item_top(): tdb = TrailDB('pydata-tutorial') stats = Counter(event.title for uuid, trail in tdb.trails(rawitems=True) for event in trail) return [(tdb.get_item_value(item), f) for item, f in stats.most_common(5)]
def string_top(): tdb = TrailDB('pydata-tutorial') return Counter(event.title for uuid, trail in tdb.trails() for event in trail).most_common(5)
def item_top(): tdb = TrailDB('pydata-tutorial') stats = Counter(event.title for uuid, trail in tdb.trails(rawitems=True) for event in trail) return [(tdb.get_item_value(item), f) for item, f in stats.most_common(5)]
def string_top(): tdb = TrailDB('pydata-tutorial') return Counter(event.title for uuid, trail in tdb.trails() for event in trail).most_common(5)