def process(options, collection, annotationName, simdir, resultfile): rootpath = options.rootpath if checkToSkip(resultfile, options.overwrite): return 0 concepts = readConcepts(collection, annotationName, rootpath=rootpath) concept_num = len(concepts) id_images = readImageSet(collection, collection, rootpath) image_num = len(id_images) im2index = dict(zip(id_images, range(image_num))) print ('%d instances, %d concepts to dump -> %s' % (image_num, concept_num, resultfile)) scores = np.zeros((image_num, concept_num)) - 1e4 for c_id,concept in enumerate(concepts): simfile = os.path.join(simdir, '%s.txt' % concept) ranklist = readRankingResults(simfile) for im,score in ranklist: idx = im2index[im] scores[idx,c_id] = score makedirsforfile(resultfile) output = open(resultfile, 'wb') pickle.dump({'concepts':concepts, 'id_images':map(int,id_images), 'scores':scores}, output, -1) output.close()
def process(options, collection, annotationName, simdir, resultfile): rootpath = options.rootpath if checkToSkip(resultfile, options.overwrite): return 0 concepts = readConcepts(collection, annotationName, rootpath=rootpath) concept_num = len(concepts) id_images = readImageSet(collection, collection, rootpath) image_num = len(id_images) im2index = dict(zip(id_images, range(image_num))) print('%d instances, %d concepts to dump -> %s' % (image_num, concept_num, resultfile)) scores = np.zeros((image_num, concept_num)) - 1e4 for c_id, concept in enumerate(concepts): simfile = os.path.join(simdir, '%s.txt' % concept) ranklist = readRankingResults(simfile) for im, score in ranklist: idx = im2index[im] scores[idx, c_id] = score makedirsforfile(resultfile) output = open(resultfile, 'wb') pickle.dump( { 'concepts': concepts, 'id_images': map(int, id_images), 'scores': scores }, output, -1) output.close()
def precompute(self, concept): print("[%s] precomputing candidate positive examples for %s" % (self.name, concept)) datafile = os.path.join(self.datadir, '%s.txt' % concept) ranklist = readRankingResults(datafile) self.candidateset = [x[0] for x in ranklist] self.target = concept
def _get_neighbors(self, content, context): testCollection,testid = context.split(',') knnfile = os.path.join(self.rootpath, testCollection, 'SimilarityIndex', testCollection, self.knndir, testid[-2:], '%s.txt' % testid) knn = readRankingResults(knnfile) knn = knn[:self.k] if self.noise > 1e-3: n = int(len(knn) * self.noise) hits = random.sample(xrange(len(knn)), n) random_set = random.sample(self.imset, n) for i in range(n): idx = hits[i] knn[idx] = (random_set[i], 1000) return knn
def GET(self): input = web.input(query=None) resp = { 'status': 0, 'hits': 0, 'random': [], 'tagrel': [], 'metric': metric, 'perf': 0 } if input.query: resp['status'] = 1 resp['query'] = input.query query = input.query.lower() if query.isdigit(): # request to view a specific image resp['hits'] = 1 resp['tagrel'] = [{'id': query}] return render.index(resp) try: names, labels = readAnnotationsFrom(collection, annotationName, query) name2label = dict(zip(names, labels)) except Exception, e: name2label = {} content = [] try: if input.tagrel == '0': labeled = readLabeledImageSet(collection, query, rootpath=rootpath) ranklist = [(x, 0) for x in labeled] else: simfile = os.path.join(simdir, '%s.txt' % query) ranklist = readRankingResults(simfile) resp['hits'] = len(ranklist) for name, score in ranklist: color = 'Chartreuse' if name2label.get(name, 0) > 0 else 'red' color = 'white' if name not in name2label else color res = {'id': name, 'color': color} content.append(res) resp['perf'] = 0 if not name2label else scorer.score( [name2label[x[0]] for x in ranklist if x[0] in name2label]) resp['tagrel'] = content[:max_hits] except: None
def precompute_annotator(self, concept): INFO = 'dataengine.%s.precompute_annotator'%self.__class__.__name__ topn = 100 NegativeEngine.precompute_annotator(self, concept) for subconcept in concept.split('-'): expandedTagSet = set([subconcept] + wn_expand(subconcept)) try: datafile = os.path.join(ROOT_PATH, self.collection, 'SimilarityIndex', 'ngd', '%s.txt' % subconcept) rankedtags = readRankingResults(datafile) expandedTagSet = expandedTagSet.union(set([x[0] for x in rankedtags[:topn]])) except: printError(INFO, 'failed to load ranktag file for %s' % subconcept) self.annotator = self.annotator.union(expandedTagSet) printStatus(INFO, 'precomputing the virtual annotator for %s: %d tags' % (concept, len(self.annotator)))
def _get_neighbors(self, content, context): testCollection, testid = context.split(',') knnfile = os.path.join(self.rootpath, testCollection, 'SimilarityIndex', testCollection, self.knndir, testid[-2:], '%s.txt' % testid) knn = readRankingResults(knnfile) knn = knn[:self.k] if self.noise > 1e-3: n = int(len(knn) * self.noise) hits = random.sample(xrange(len(knn)), n) random_set = random.sample(self.imset, n) for i in range(n): idx = hits[i] knn[idx] = (random_set[i], 1000) return knn
def process(options, collection, annotationName, runfile, newRunName): rootpath = options.rootpath overwrite = options.overwrite dataset = options.testset if options.testset else collection concepts = readConcepts(collection, annotationName, rootpath) simdir = os.path.join(rootpath, collection, "SimilarityIndex", dataset) data = [x.strip() for x in open(runfile).readlines() if x.strip() and not x.strip().startswith("#")] models = [] for line in data: weight, run = str.split(line) models.append((run, float(weight), 1)) for concept in concepts: resultfile = os.path.join(simdir, newRunName, concept + ".txt") if checkToSkip(resultfile, overwrite): continue scorefile = os.path.join(simdir, models[0][0], concept + ".txt") if not os.path.exists(scorefile): print("%s does not exist. skip" % scorefile) continue ranklist = readRankingResults(scorefile) names = sorted([x[0] for x in ranklist]) nr_of_images = len(names) name2index = dict(zip(names, range(nr_of_images))) print("%s %d" % (concept, nr_of_images)) scoreTable = readImageScoreTable(concept, name2index, simdir, models, torank=options.torank) assert scoreTable.shape[1] == nr_of_images weights = [model[1] for model in models] scores = np.matrix(weights) * scoreTable scores = [float(scores[0, k]) for k in range(nr_of_images)] newranklist = [(names[i], scores[i]) for i in range(nr_of_images)] newranklist.sort(key=lambda v: (v[1], v[0]), reverse=True) writeRankingResults(newranklist, resultfile)
def process(options, collection, annotationName, runfile, newRunName): rootpath = options.rootpath overwrite = options.overwrite dataset = options.testset if options.testset else collection concepts = readConcepts(collection, annotationName, rootpath) simdir = os.path.join(rootpath, collection, "SimilarityIndex", dataset) data = [x.strip() for x in open(runfile).readlines() if x.strip() and not x.strip().startswith("#")] models = [] for line in data: weight,run = str.split(line) models.append((run, float(weight), 1)) for concept in concepts: resultfile = os.path.join(simdir, newRunName, concept + ".txt") if checkToSkip(resultfile, overwrite): continue scorefile = os.path.join(simdir, models[0][0], concept + ".txt") if not os.path.exists(scorefile): print ("%s does not exist. skip" % scorefile) continue ranklist = readRankingResults(scorefile) names = sorted([x[0] for x in ranklist]) nr_of_images = len(names) name2index = dict(zip(names, range(nr_of_images))) print ('%s %d' % (concept, nr_of_images)) scoreTable = readImageScoreTable(concept, name2index, simdir, models, torank=options.torank) assert(scoreTable.shape[1] == nr_of_images) weights = [model[1] for model in models] scores = np.matrix(weights) * scoreTable scores = [float(scores[0,k]) for k in range(nr_of_images)] newranklist = [(names[i], scores[i]) for i in range(nr_of_images)] newranklist.sort(key=lambda v:(v[1],v[0]), reverse=True) writeRankingResults(newranklist, resultfile)
def precompute_annotator(self, concept): INFO = 'dataengine.%s.precompute_annotator' % self.__class__.__name__ topn = 100 NegativeEngine.precompute_annotator(self, concept) for subconcept in concept.split('-'): expandedTagSet = set([subconcept] + wn_expand(subconcept)) try: datafile = os.path.join(ROOT_PATH, self.collection, 'SimilarityIndex', 'ngd', '%s.txt' % subconcept) rankedtags = readRankingResults(datafile) expandedTagSet = expandedTagSet.union( set([x[0] for x in rankedtags[:topn]])) except: printError(INFO, 'failed to load ranktag file for %s' % subconcept) self.annotator = self.annotator.union(expandedTagSet) printStatus( INFO, 'precomputing the virtual annotator for %s: %d tags' % (concept, len(self.annotator)))
def GET(self): input = web.input(query=None) resp = {'status':0, 'hits':0, 'random':[], 'tagrel':[], 'metric':metric, 'perf':0} if input.query: resp['status'] = 1 resp['query'] = input.query query = input.query.lower() if query.isdigit(): # request to view a specific image resp['hits'] = 1 resp['tagrel'] = [{'id':query}] return render.index(resp) try: names,labels = readAnnotationsFrom(collection, annotationName, query) name2label = dict(zip(names,labels)) except Exception, e: name2label = {} content = [] try: if input.tagrel == '0': labeled = readLabeledImageSet(collection, query, rootpath=rootpath) ranklist = [(x,0) for x in labeled] else: simfile = os.path.join(simdir, '%s.txt' % query) ranklist = readRankingResults(simfile) resp['hits'] = len(ranklist) for name,score in ranklist: color = 'Chartreuse' if name2label.get(name,0)>0 else 'red' color = 'white' if name not in name2label else color res = {'id':name, 'color':color} content.append(res) resp['perf'] = 0 if not name2label else scorer.score([name2label[x[0]] for x in ranklist if x[0] in name2label]) resp['tagrel'] = content[:max_hits] except: None
assert( rankMethod.startswith('tagged,lemm/%s'%collection) ) newAnnotationTemplate = annotationName[:-4] + '.' + posName + str(nr_pos) + ('.random%d'%nr_neg) + '.%d.txt' concepts = readConcepts(collection, annotationName, rootpath) simdir = os.path.join(rootpath, collection, 'SimilarityIndex', collection, rankMethod) scriptfile = os.path.join(rootpath,collection,'annotationfiles', annotationName[:-4] + '.' + posName + str(nr_pos) + ('.random%d'%nr_neg) + '.0-%d.txt'%(nr_neg_bags-1)) makedirsforfile(scriptfile) fout = open(scriptfile,'w') fout.write('\n'.join([newAnnotationTemplate%t for t in range(nr_neg_bags)]) + '\n') fout.close() for concept in concepts: simfile = os.path.join(simdir, '%s.txt' % concept) ranklist = readRankingResults(simfile) pos_bag = [x[0] for x in ranklist[:nr_pos]] names, labels = readAnnotationsFrom(collection, annotationName, concept, skip_0=True, rootpath=rootpath) negativePool = [x[0] for x in zip(names,labels) if x[1] < 0] for t in range(nr_neg_bags): newAnnotationName = newAnnotationTemplate % t resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt'%concept) if checkToSkip(resultfile, overwrite): continue true_nr_neg = max(500, len(pos_bag)*neg_pos_ratio) neg_bag = random.sample(negativePool, true_nr_neg) #len(pos_bag)*neg_pos_ratio) assert(len(set(pos_bag).intersection(set(neg_bag))) == 0) printStatus(INFO, "anno(%s,%d) %d pos %d neg -> %s" % (concept,t,len(pos_bag),len(neg_bag),resultfile)) writeAnnotations(pos_bag + neg_bag, [1]*len(pos_bag) + [-1]*len(neg_bag), resultfile)
#posName = 'ccgd' + str(numPos) #tagrelMethod = 'flickr1m/ccgd,knn,1000' concepts = readConcepts(collection, sourceAnnotationName % 0, rootpath) holdoutfile = os.path.join(rootpath, collection, "ImageSets", "holdout.txt") holdoutSet = set(map(str.strip, open(holdoutfile).readlines())) print('%s holdout %d' % (collection, len(holdoutSet))) for concept in concepts: simfile = os.path.join(rootpath, collection, 'SimilarityIndex', collection, 'tagged,lemm', tagrelMethod, '%s.txt' % concept) searchresults = readRankingResults(simfile) searchresults = [x for x in searchresults if x[0] not in holdoutSet] positiveSet = [x[0] for x in searchresults[:numPos]] for t in range(T): newAnnotationName = sourceAnnotationName % t newAnnotationName = newAnnotationName.replace( 'rand%d.0' % numPos, posName) names, labels = readAnnotationsFrom(collection, sourceAnnotationName % t, concept, rootpath) negativeSet = [x[0] for x in zip(names, labels) if -1 == x[1]] renamed = positiveSet + negativeSet relabeled = [1] * len(positiveSet) + [-1] * len(negativeSet) print('[%s] %s +%d, -%d -> %s' %
#tagrelMethod = 'textual' posName = 'clickcount' + str(numPos) tagrelMethod = 'clickcount' #posName = 'ccgd' + str(numPos) #tagrelMethod = 'flickr1m/ccgd,knn,1000' concepts = readConcepts(collection, sourceAnnotationName%0, rootpath) holdoutfile = os.path.join(rootpath, collection, "ImageSets", "holdout.txt") holdoutSet = set(map(str.strip, open(holdoutfile).readlines())) print ('%s holdout %d' % (collection,len(holdoutSet))) for concept in concepts: simfile = os.path.join(rootpath, collection, 'SimilarityIndex', collection, 'tagged,lemm', tagrelMethod, '%s.txt' % concept) searchresults = readRankingResults(simfile) searchresults = [x for x in searchresults if x[0] not in holdoutSet] positiveSet = [x[0] for x in searchresults[:numPos]] for t in range(T): newAnnotationName = sourceAnnotationName % t newAnnotationName = newAnnotationName.replace('rand%d.0'%numPos, posName) names,labels = readAnnotationsFrom(collection,sourceAnnotationName%t,concept,rootpath) negativeSet = [x[0] for x in zip(names,labels) if -1 == x[1]] renamed = positiveSet + negativeSet relabeled = [1] * len(positiveSet) + [-1] * len(negativeSet) print ('[%s] %s +%d, -%d -> %s' % (concept,sourceAnnotationName % t,len(positiveSet),len(negativeSet),newAnnotationName)) writeAnnotationsTo(renamed, relabeled, collection, newAnnotationName, concept, rootpath) for t in range(T):
rankMethod) scriptfile = os.path.join( rootpath, collection, 'annotationfiles', annotationName[:-4] + '.' + posName + str(nr_pos) + ('.random%d' % nr_neg) + '.0-%d.txt' % (nr_neg_bags - 1)) makedirsforfile(scriptfile) fout = open(scriptfile, 'w') fout.write( '\n'.join([newAnnotationTemplate % t for t in range(nr_neg_bags)]) + '\n') fout.close() for concept in concepts: simfile = os.path.join(simdir, '%s.txt' % concept) ranklist = readRankingResults(simfile) pos_bag = [x[0] for x in ranklist[:nr_pos]] names, labels = readAnnotationsFrom(collection, annotationName, concept, skip_0=True, rootpath=rootpath) negativePool = [x[0] for x in zip(names, labels) if x[1] < 0] for t in range(nr_neg_bags): newAnnotationName = newAnnotationTemplate % t resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt' % concept) if checkToSkip(resultfile, overwrite): continue
def precompute(self, concept): print ("[%s] precomputing candidate positive examples for %s" % (self.name, concept)) datafile = os.path.join(self.datadir, '%s.txt' % concept) ranklist = readRankingResults(datafile) self.candidateset = [x[0] for x in ranklist] self.target = concept