Exemplo n.º 1
0
def process(options, collection, annotationName, simdir, resultfile):
    rootpath = options.rootpath
    
    if checkToSkip(resultfile, options.overwrite):
        return 0
    
    concepts = readConcepts(collection, annotationName, rootpath=rootpath)
    concept_num = len(concepts)

    id_images = readImageSet(collection, collection, rootpath)
    image_num = len(id_images)
    im2index = dict(zip(id_images, range(image_num)))
    print ('%d instances, %d concepts to dump -> %s' % (image_num, concept_num, resultfile))
    
    scores = np.zeros((image_num, concept_num)) - 1e4
    
    for c_id,concept in enumerate(concepts):
        simfile = os.path.join(simdir, '%s.txt' % concept)
        ranklist = readRankingResults(simfile)
        for im,score in ranklist:
            idx = im2index[im]
            scores[idx,c_id] = score

    makedirsforfile(resultfile)
    output = open(resultfile, 'wb')
    pickle.dump({'concepts':concepts, 'id_images':map(int,id_images), 'scores':scores}, output, -1)
    output.close()
Exemplo n.º 2
0
def process(options, collection, annotationName, simdir, resultfile):
    rootpath = options.rootpath

    if checkToSkip(resultfile, options.overwrite):
        return 0

    concepts = readConcepts(collection, annotationName, rootpath=rootpath)
    concept_num = len(concepts)

    id_images = readImageSet(collection, collection, rootpath)
    image_num = len(id_images)
    im2index = dict(zip(id_images, range(image_num)))
    print('%d instances, %d concepts to dump -> %s' %
          (image_num, concept_num, resultfile))

    scores = np.zeros((image_num, concept_num)) - 1e4

    for c_id, concept in enumerate(concepts):
        simfile = os.path.join(simdir, '%s.txt' % concept)
        ranklist = readRankingResults(simfile)
        for im, score in ranklist:
            idx = im2index[im]
            scores[idx, c_id] = score

    makedirsforfile(resultfile)
    output = open(resultfile, 'wb')
    pickle.dump(
        {
            'concepts': concepts,
            'id_images': map(int, id_images),
            'scores': scores
        }, output, -1)
    output.close()
Exemplo n.º 3
0
 def precompute(self, concept):
     print("[%s] precomputing candidate positive examples for %s" %
           (self.name, concept))
     datafile = os.path.join(self.datadir, '%s.txt' % concept)
     ranklist = readRankingResults(datafile)
     self.candidateset = [x[0] for x in ranklist]
     self.target = concept
Exemplo n.º 4
0
 def _get_neighbors(self, content, context):
     testCollection,testid = context.split(',')
     knnfile = os.path.join(self.rootpath, testCollection, 'SimilarityIndex', testCollection, self.knndir, testid[-2:], '%s.txt' % testid)
     knn = readRankingResults(knnfile)
     knn = knn[:self.k]
     if self.noise > 1e-3:
         n = int(len(knn) * self.noise)
         hits = random.sample(xrange(len(knn)), n)
         random_set = random.sample(self.imset, n)
         for i in range(n):
             idx = hits[i]
             knn[idx] = (random_set[i], 1000)
     return knn
Exemplo n.º 5
0
    def GET(self):
        input = web.input(query=None)
        resp = {
            'status': 0,
            'hits': 0,
            'random': [],
            'tagrel': [],
            'metric': metric,
            'perf': 0
        }

        if input.query:
            resp['status'] = 1
            resp['query'] = input.query
            query = input.query.lower()

            if query.isdigit():  # request to view a specific image
                resp['hits'] = 1
                resp['tagrel'] = [{'id': query}]
                return render.index(resp)

            try:
                names, labels = readAnnotationsFrom(collection, annotationName,
                                                    query)
                name2label = dict(zip(names, labels))
            except Exception, e:
                name2label = {}

            content = []
            try:
                if input.tagrel == '0':
                    labeled = readLabeledImageSet(collection,
                                                  query,
                                                  rootpath=rootpath)
                    ranklist = [(x, 0) for x in labeled]
                else:
                    simfile = os.path.join(simdir, '%s.txt' % query)
                    ranklist = readRankingResults(simfile)
                resp['hits'] = len(ranklist)
                for name, score in ranklist:
                    color = 'Chartreuse' if name2label.get(name,
                                                           0) > 0 else 'red'
                    color = 'white' if name not in name2label else color
                    res = {'id': name, 'color': color}
                    content.append(res)
                resp['perf'] = 0 if not name2label else scorer.score(
                    [name2label[x[0]] for x in ranklist if x[0] in name2label])
                resp['tagrel'] = content[:max_hits]
            except:
                None
Exemplo n.º 6
0
 def precompute_annotator(self, concept):
     INFO = 'dataengine.%s.precompute_annotator'%self.__class__.__name__
     topn = 100
     NegativeEngine.precompute_annotator(self, concept)
     
     for subconcept in concept.split('-'):
         expandedTagSet = set([subconcept] + wn_expand(subconcept))
         try:
             datafile = os.path.join(ROOT_PATH, self.collection, 'SimilarityIndex', 'ngd', '%s.txt' % subconcept)
             rankedtags = readRankingResults(datafile)
             expandedTagSet = expandedTagSet.union(set([x[0] for x in rankedtags[:topn]]))
         except:
             printError(INFO, 'failed to load ranktag file for %s' % subconcept)
         self.annotator = self.annotator.union(expandedTagSet)
     printStatus(INFO, 'precomputing the virtual annotator for %s: %d tags' % (concept, len(self.annotator)))
Exemplo n.º 7
0
 def _get_neighbors(self, content, context):
     testCollection, testid = context.split(',')
     knnfile = os.path.join(self.rootpath, testCollection,
                            'SimilarityIndex', testCollection, self.knndir,
                            testid[-2:], '%s.txt' % testid)
     knn = readRankingResults(knnfile)
     knn = knn[:self.k]
     if self.noise > 1e-3:
         n = int(len(knn) * self.noise)
         hits = random.sample(xrange(len(knn)), n)
         random_set = random.sample(self.imset, n)
         for i in range(n):
             idx = hits[i]
             knn[idx] = (random_set[i], 1000)
     return knn
Exemplo n.º 8
0
def process(options, collection, annotationName, runfile, newRunName):
    rootpath = options.rootpath
    overwrite = options.overwrite

    dataset = options.testset if options.testset else collection

    concepts = readConcepts(collection, annotationName, rootpath)
    simdir = os.path.join(rootpath, collection, "SimilarityIndex", dataset)

    data = [x.strip() for x in open(runfile).readlines() if x.strip() and not x.strip().startswith("#")]
    models = []
    for line in data:
        weight, run = str.split(line)
        models.append((run, float(weight), 1))

    for concept in concepts:
        resultfile = os.path.join(simdir, newRunName, concept + ".txt")
        if checkToSkip(resultfile, overwrite):
            continue

        scorefile = os.path.join(simdir, models[0][0], concept + ".txt")
        if not os.path.exists(scorefile):
            print("%s does not exist. skip" % scorefile)
            continue

        ranklist = readRankingResults(scorefile)
        names = sorted([x[0] for x in ranklist])

        nr_of_images = len(names)
        name2index = dict(zip(names, range(nr_of_images)))

        print("%s %d" % (concept, nr_of_images))

        scoreTable = readImageScoreTable(concept, name2index, simdir, models, torank=options.torank)
        assert scoreTable.shape[1] == nr_of_images

        weights = [model[1] for model in models]

        scores = np.matrix(weights) * scoreTable
        scores = [float(scores[0, k]) for k in range(nr_of_images)]

        newranklist = [(names[i], scores[i]) for i in range(nr_of_images)]
        newranklist.sort(key=lambda v: (v[1], v[0]), reverse=True)

        writeRankingResults(newranklist, resultfile)
Exemplo n.º 9
0
def process(options, collection, annotationName, runfile, newRunName):
    rootpath = options.rootpath
    overwrite = options.overwrite

    dataset = options.testset if options.testset else collection
    
    concepts = readConcepts(collection, annotationName, rootpath)
    simdir = os.path.join(rootpath, collection, "SimilarityIndex", dataset)

    data = [x.strip() for x in open(runfile).readlines() if x.strip() and not x.strip().startswith("#")]
    models = []
    for line in data:
        weight,run = str.split(line)
        models.append((run, float(weight), 1))
    
    for concept in concepts:
        resultfile = os.path.join(simdir, newRunName, concept + ".txt")
        if checkToSkip(resultfile, overwrite):
            continue

        scorefile = os.path.join(simdir, models[0][0], concept + ".txt")
        if not os.path.exists(scorefile):
            print ("%s does not exist. skip" % scorefile)
            continue

        ranklist = readRankingResults(scorefile)
        names = sorted([x[0] for x in ranklist])

        nr_of_images = len(names)
        name2index = dict(zip(names, range(nr_of_images)))
   
        print ('%s %d' % (concept, nr_of_images))
        
        scoreTable = readImageScoreTable(concept, name2index, simdir, models, torank=options.torank)
        assert(scoreTable.shape[1] == nr_of_images)

        weights = [model[1] for model in models]

        scores = np.matrix(weights) * scoreTable
        scores = [float(scores[0,k]) for k in range(nr_of_images)]
  
        newranklist = [(names[i], scores[i]) for i in range(nr_of_images)]
        newranklist.sort(key=lambda v:(v[1],v[0]), reverse=True)
     
        writeRankingResults(newranklist, resultfile)
Exemplo n.º 10
0
    def precompute_annotator(self, concept):
        INFO = 'dataengine.%s.precompute_annotator' % self.__class__.__name__
        topn = 100
        NegativeEngine.precompute_annotator(self, concept)

        for subconcept in concept.split('-'):
            expandedTagSet = set([subconcept] + wn_expand(subconcept))
            try:
                datafile = os.path.join(ROOT_PATH, self.collection,
                                        'SimilarityIndex', 'ngd',
                                        '%s.txt' % subconcept)
                rankedtags = readRankingResults(datafile)
                expandedTagSet = expandedTagSet.union(
                    set([x[0] for x in rankedtags[:topn]]))
            except:
                printError(INFO,
                           'failed to load ranktag file for %s' % subconcept)
            self.annotator = self.annotator.union(expandedTagSet)
        printStatus(
            INFO, 'precomputing the virtual annotator for %s: %d tags' %
            (concept, len(self.annotator)))
Exemplo n.º 11
0
    def GET(self):
        input = web.input(query=None)
        resp = {'status':0, 'hits':0, 'random':[], 'tagrel':[], 'metric':metric, 'perf':0}

        if input.query:
            resp['status'] = 1
            resp['query'] = input.query
            query = input.query.lower()

            if query.isdigit(): # request to view a specific image
                resp['hits'] = 1
                resp['tagrel'] = [{'id':query}]
                return  render.index(resp)
            
            try:
                names,labels = readAnnotationsFrom(collection, annotationName, query)
                name2label = dict(zip(names,labels))
            except Exception, e:
                name2label = {}

            content = []
            try:
                if input.tagrel == '0':
                    labeled = readLabeledImageSet(collection, query, rootpath=rootpath)
                    ranklist = [(x,0) for x in labeled]
                else:
                    simfile = os.path.join(simdir, '%s.txt' % query)
                    ranklist = readRankingResults(simfile)
                resp['hits'] = len(ranklist)
                for name,score in ranklist:
                    color = 'Chartreuse' if name2label.get(name,0)>0 else 'red'
                    color = 'white' if name not in name2label else color
                    res = {'id':name, 'color':color}
                    content.append(res)
                resp['perf'] = 0 if not name2label else scorer.score([name2label[x[0]] for x in ranklist if x[0] in name2label])
                resp['tagrel'] = content[:max_hits]
            except:
                None
Exemplo n.º 12
0
    assert( rankMethod.startswith('tagged,lemm/%s'%collection) )

    newAnnotationTemplate = annotationName[:-4] + '.' + posName + str(nr_pos) + ('.random%d'%nr_neg) + '.%d.txt'
    concepts = readConcepts(collection, annotationName, rootpath)    
    simdir = os.path.join(rootpath, collection, 'SimilarityIndex', collection, rankMethod)

    scriptfile = os.path.join(rootpath,collection,'annotationfiles', annotationName[:-4] + '.' + posName + str(nr_pos) + ('.random%d'%nr_neg) + '.0-%d.txt'%(nr_neg_bags-1))
    makedirsforfile(scriptfile)
    fout = open(scriptfile,'w')
    fout.write('\n'.join([newAnnotationTemplate%t for t in range(nr_neg_bags)]) + '\n')
    fout.close()


    for concept in concepts:
        simfile = os.path.join(simdir, '%s.txt' % concept)
        ranklist = readRankingResults(simfile)
        pos_bag = [x[0] for x in ranklist[:nr_pos]]
        names, labels = readAnnotationsFrom(collection, annotationName, concept, skip_0=True, rootpath=rootpath)
        negativePool = [x[0] for x in zip(names,labels) if x[1] < 0]

        for t in range(nr_neg_bags):
            newAnnotationName = newAnnotationTemplate % t
            resultfile = os.path.join(rootpath, collection, 'Annotations', 'Image', newAnnotationName, '%s.txt'%concept)
            if checkToSkip(resultfile, overwrite):
                continue
            true_nr_neg = max(500, len(pos_bag)*neg_pos_ratio)
            neg_bag = random.sample(negativePool, true_nr_neg) #len(pos_bag)*neg_pos_ratio)
            assert(len(set(pos_bag).intersection(set(neg_bag))) == 0)
            printStatus(INFO, "anno(%s,%d) %d pos %d neg -> %s" % (concept,t,len(pos_bag),len(neg_bag),resultfile))
            writeAnnotations(pos_bag + neg_bag, [1]*len(pos_bag) + [-1]*len(neg_bag), resultfile)
Exemplo n.º 13
0
    #posName = 'ccgd' + str(numPos)
    #tagrelMethod = 'flickr1m/ccgd,knn,1000'

    concepts = readConcepts(collection, sourceAnnotationName % 0, rootpath)

    holdoutfile = os.path.join(rootpath, collection, "ImageSets",
                               "holdout.txt")
    holdoutSet = set(map(str.strip, open(holdoutfile).readlines()))
    print('%s holdout %d' % (collection, len(holdoutSet)))

    for concept in concepts:
        simfile = os.path.join(rootpath, collection, 'SimilarityIndex',
                               collection, 'tagged,lemm', tagrelMethod,
                               '%s.txt' % concept)
        searchresults = readRankingResults(simfile)
        searchresults = [x for x in searchresults if x[0] not in holdoutSet]
        positiveSet = [x[0] for x in searchresults[:numPos]]

        for t in range(T):
            newAnnotationName = sourceAnnotationName % t
            newAnnotationName = newAnnotationName.replace(
                'rand%d.0' % numPos, posName)
            names, labels = readAnnotationsFrom(collection,
                                                sourceAnnotationName % t,
                                                concept, rootpath)

            negativeSet = [x[0] for x in zip(names, labels) if -1 == x[1]]
            renamed = positiveSet + negativeSet
            relabeled = [1] * len(positiveSet) + [-1] * len(negativeSet)
            print('[%s] %s +%d, -%d -> %s' %
Exemplo n.º 14
0
    #tagrelMethod = 'textual'
    posName = 'clickcount' + str(numPos)
    tagrelMethod = 'clickcount'
    
    #posName = 'ccgd' + str(numPos)
    #tagrelMethod = 'flickr1m/ccgd,knn,1000'

    concepts = readConcepts(collection, sourceAnnotationName%0, rootpath)

    holdoutfile = os.path.join(rootpath, collection, "ImageSets", "holdout.txt") 
    holdoutSet = set(map(str.strip, open(holdoutfile).readlines()))
    print ('%s holdout %d' % (collection,len(holdoutSet)))
 
    for concept in concepts:
        simfile = os.path.join(rootpath, collection, 'SimilarityIndex', collection, 'tagged,lemm', tagrelMethod, '%s.txt' % concept)
        searchresults = readRankingResults(simfile)
        searchresults = [x for x in searchresults if x[0] not in holdoutSet]
        positiveSet = [x[0] for x in searchresults[:numPos]]
                
        for t in range(T):
            newAnnotationName = sourceAnnotationName % t
            newAnnotationName = newAnnotationName.replace('rand%d.0'%numPos, posName)
            names,labels = readAnnotationsFrom(collection,sourceAnnotationName%t,concept,rootpath)
            
            negativeSet = [x[0] for x in zip(names,labels) if -1 == x[1]]
            renamed = positiveSet + negativeSet
            relabeled = [1] * len(positiveSet) + [-1] * len(negativeSet)
            print ('[%s] %s +%d, -%d -> %s' % (concept,sourceAnnotationName % t,len(positiveSet),len(negativeSet),newAnnotationName)) 
            writeAnnotationsTo(renamed, relabeled, collection, newAnnotationName, concept, rootpath)
            
    for t in range(T):
Exemplo n.º 15
0
                          rankMethod)

    scriptfile = os.path.join(
        rootpath, collection, 'annotationfiles',
        annotationName[:-4] + '.' + posName + str(nr_pos) +
        ('.random%d' % nr_neg) + '.0-%d.txt' % (nr_neg_bags - 1))
    makedirsforfile(scriptfile)
    fout = open(scriptfile, 'w')
    fout.write(
        '\n'.join([newAnnotationTemplate % t
                   for t in range(nr_neg_bags)]) + '\n')
    fout.close()

    for concept in concepts:
        simfile = os.path.join(simdir, '%s.txt' % concept)
        ranklist = readRankingResults(simfile)
        pos_bag = [x[0] for x in ranklist[:nr_pos]]
        names, labels = readAnnotationsFrom(collection,
                                            annotationName,
                                            concept,
                                            skip_0=True,
                                            rootpath=rootpath)
        negativePool = [x[0] for x in zip(names, labels) if x[1] < 0]

        for t in range(nr_neg_bags):
            newAnnotationName = newAnnotationTemplate % t
            resultfile = os.path.join(rootpath, collection, 'Annotations',
                                      'Image', newAnnotationName,
                                      '%s.txt' % concept)
            if checkToSkip(resultfile, overwrite):
                continue
Exemplo n.º 16
0
 def precompute(self, concept):
 	  print ("[%s] precomputing candidate positive examples for %s" % (self.name, concept))
 	  datafile = os.path.join(self.datadir, '%s.txt' % concept)
 	  ranklist = readRankingResults(datafile)
 	  self.candidateset = [x[0] for x in ranklist]
 	  self.target = concept