-
Notifications
You must be signed in to change notification settings - Fork 0
/
print-duplicates.py
executable file
·32 lines (28 loc) · 1.01 KB
/
print-duplicates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import cPickle as pickle
import merge_metadata
import os
import pymongo
def full_path(coll, id):
if id is None:
return os.sep
row=coll.find_one({'_id':id},{'name':1,'parent':1})
return os.path.join(full_path(coll, row['parent']),row['name'])
if __name__ == '__main__':
db = pymongo.Connection().test
digests = db.mp3_dups.find({'value': {'$gt': 1}},sort=[('value',-1)])
for digest in digests:
dups = db.files.find({'digest': digest['_id']})
print "*** {}: {:.0f}".format(digest['_id'], digest['value'])
dup_dict = {}
for dup in dups:
try:
dup_name = os.path.join(full_path(db.files, dup['parent']),dup['name']).encode('utf-8')
dup_dict[dup_name] = pickle.loads(dup['tags'])
except KeyError:
continue
print '\n'.join(dup_dict.keys())
if dup_dict:
merge_metadata.merge_file_metadata(dup_dict, 'new_files')
print