示例#1
0
    def test_apply_blacklist(self):
        uuids = [
            "02345678123456781234567812345678",
            "12345678123456781234567812345678",
            "22345678123456781234567812345678",
            "32345678123456781234567812345678",
            "42345678123456781234567812345678"
        ]
        cons = TrailDBConstructor('blacklist_testtrail', ['field1', 'field2'])
        for uuid in uuids:
            cons.add(uuid, 1, ['a', '1'])
            cons.add(uuid, 2, ['b', '2'])
            cons.add(uuid, 3, ['c', '3'])
        cons.finalize()

        tdb = TrailDB('blacklist_testtrail')
        blacklist = [uuids[1], uuids[2]]
        tdb.apply_blacklist(blacklist)
        found_trails = list(tdb.trails(parsetime=False))

        for trail_uuid, trail_events in found_trails:
            if trail_uuid in blacklist:
                expected_length = 0
            else:
                expected_length = 3

            trail_events = list(trail_events)
            self.assertEqual(len(trail_events), expected_length)
示例#2
0
    def test_crumbs(self):
        db = TrailDB('testtrail.tdb')

        n = 0
        for uuid, trail in db.trails():
            n += 1
            self.assertEqual(self.uuid, uuid)
            self.assertIsInstance(trail, TrailDBCursor)
            self.assertEqual(3, len(list(trail)))

        self.assertEqual(1, n)
示例#3
0
    def test_crumbs(self):
        db = TrailDB('testtrail.tdb')

        n = 0
        for uuid, trail in db.trails():
            n += 1
            self.assertEqual(self.uuid, uuid)
            self.assertIsInstance(trail, TrailDBCursor)
            self.assertEqual(3, len(list(trail)))

        self.assertEqual(1, n)
示例#4
0
def loading():
    traildb = TrailDB("/mnt/data/wikipedia-history-small.tdb")
    user_edits = 0
    ip_edits = 0

    for uuid, trail in traildb.trails():
        for event in trail:
            if event.user != "":
                user_edits += 1
            elif event.ip != "":
                ip_edits += 1

    print("User edits: {}".format(user_edits))
    print("IP edits: {}".format(ip_edits))
示例#5
0
    def test_trails_selected_uuids(self):
        uuids = [
            "02345678123456781234567812345678",
            "12345678123456781234567812345678",
            "22345678123456781234567812345678",
            "32345678123456781234567812345678",
            "42345678123456781234567812345678"
        ]
        cons = TrailDBConstructor('whitelist_testtrail', ['field1', 'field2'])
        for uuid in uuids:
            cons.add(uuid, 1, ['a', '1'])
            cons.add(uuid, 2, ['b', '2'])
            cons.add(uuid, 3, ['c', '3'])
        cons.finalize()

        tdb = TrailDB('whitelist_testtrail')
        whitelist = [uuids[0], uuids[3], uuids[4]]

        expected_length = 3
        for trail_uuid, trail_events in tdb.trails(selected_uuids=whitelist):
            trail_events = list(trail_events)
            self.assertEqual(len(trail_events), expected_length)
def item_top():
    tdb = TrailDB('pydata-tutorial')
    stats = Counter(event.title for uuid, trail in tdb.trails(rawitems=True)
                                for event in trail)
    return [(tdb.get_item_value(item), f) for item, f in stats.most_common(5)]
def string_top():
    tdb = TrailDB('pydata-tutorial')
    return Counter(event.title for uuid, trail in tdb.trails()
                               for event in trail).most_common(5)
示例#8
0
def item_top():
    tdb = TrailDB('pydata-tutorial')
    stats = Counter(event.title for uuid, trail in tdb.trails(rawitems=True)
                                for event in trail)
    return [(tdb.get_item_value(item), f) for item, f in stats.most_common(5)]
示例#9
0
def string_top():
    tdb = TrailDB('pydata-tutorial')
    return Counter(event.title for uuid, trail in tdb.trails()
                               for event in trail).most_common(5)