-
Notifications
You must be signed in to change notification settings - Fork 2
/
tagrel.py
107 lines (74 loc) · 3.68 KB
/
tagrel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import sys, os, time
from common import ROOT_PATH
from util import printStatus
from textstore import RecordStore
from simpleknn import simpleknn
INFO = 'tagrel.tagrel'
class TagrelLearner:
# tpp (tag preprocessing) has to be chosen from {'raw', 'stem', 'lemm'}
def __init__(self, collection, feature, distance, tpp='lemm', rootpath=ROOT_PATH):
feat_dir = os.path.join(rootpath, collection, "FeatureData", feature)
id_file = os.path.join(feat_dir, "id.txt")
feat_file = os.path.join(feat_dir, "feature.bin")
nr_of_images, ndims = map(int, open(os.path.join(feat_dir,'shape.txt')).readline().split())
self.searcher = simpleknn.load_model(feat_file, ndims, nr_of_images, id_file)
self.searcher.set_distance(distance)
tagfile = os.path.join(rootpath, collection, "TextData", "id.userid.%stags.txt" % tpp)
self.textstore = RecordStore(tagfile)
self.nr_neighbors = 1000
self.nr_newtags = 100
printStatus(INFO, "nr_neighbors=%d, nr_newtags=%d" % (self.nr_neighbors, self.nr_newtags))
def set_nr_neighbors(self, k):
self.nr_neighbors = k
printStatus(INFO, "setting nr_neighbors to %d" % k)
def set_nr_autotags(self, k):
self.nr_newtags = k
printStatus(INFO, "setting nr_autotags to %d" % k)
def neighbor_voting(self, neighbors, qry_tags, qry_userid):
users_voted = set([qry_userid])
tag2vote = {}
voted = 0
unlabeled = 0
thesameuser = 0
for (name, dist) in neighbors:
(userid,tags) = self.textstore.lookup(name)
if not tags:
unlabeled += 1
continue
if userid in users_voted:
thesameuser += 1
continue
tagset = set(tags.split())
for tag in tagset:
tag2vote[tag] = tag2vote.get(tag,0) + 1
users_voted.add(userid)
voted += 1
if voted >= self.nr_neighbors:
break
assert (voted >= self.nr_neighbors), 'unlabeled %d, thesameuser %d, voted %d, neighbors %d' % (unlabeled, thesameuser, voted, len(neighbors))
if not qry_tags: # no tags given, do image auto-tagging
autotags = []
for tag,vote in tag2vote.iteritems():
score = vote - self.textstore.tagprior(tag, self.nr_neighbors)
if score > 1e-6:
autotags.append((tag, score))
autotags.sort(key=lambda v:(v[1]), reverse=True)
return autotags[:self.nr_newtags]
else: # tag relevance learning
qry_tagset = set(str.split(qry_tags.lower()))
tagvotes = [(tag, tag2vote.get(tag,0) - self.textstore.tagprior(tag,self.nr_neighbors)) for tag in qry_tagset]
tagvotes.sort(key=lambda v:(v[1]), reverse=True)
return tagvotes
def estimate(self, qry_vec, qry_tags, qry_id="", qry_userid=""):
# Step 1. visual neighbor search
s_time = time.time()
neighbors = self.searcher.search_knn(qry_vec, max_hits=self.nr_neighbors*3)
search_time = time.time() - s_time
# step 2. neighbor voting
s_time = time.time()
results = self.neighbor_voting(neighbors, qry_tags, qry_userid=qry_userid)
voting_time = time.time() - s_time
#self.echo(self.estimate.__name__, "search %g, voting %g seconds" % (search_time, voting_time))
return results
if __name__ == '__main__':
tagrel = TagrelLearner('train10k','color64', 'l2')