Пример #1
0
def evaluateSearchEngines(searchers,
                          collection,
                          annotationName,
                          metric,
                          rootpath=ROOT_PATH):
    scorer = getScorer(metric)
    concepts = readConcepts(collection, annotationName, rootpath)

    nr_of_runs = len(searchers)
    nr_of_concepts = len(concepts)
    results = np.zeros((nr_of_concepts, nr_of_runs))

    for i in range(nr_of_concepts):
        names, labels = readAnnotationsFrom(collection, annotationName,
                                            concepts[i], rootpath)
        name2label = dict(zip(names, labels))

        for j in range(nr_of_runs):
            searchresults = searchers[j].scoreCollection(concepts[i])
            sorted_labels = [
                name2label[name] for (name, score) in searchresults
                if name in name2label
            ]
            results[i, j] = scorer.score(sorted_labels)

    for i in range(nr_of_concepts):
        print concepts[i], ' '.join([niceNumber(x, 3) for x in results[i, :]])
    mean_perf = results.mean(0)
    print 'mean%s' % metric, ' '.join([niceNumber(x, 3) for x in mean_perf])

    return concepts, results
Пример #2
0
def process(options, testCollection, trainCollection, tagsimMethod):
    rootpath = options.rootpath
    overwrite = options.overwrite
    testsetName = options.testset if options.testset else testCollection 
    tpp = options.tpp
    numjobs = options.numjobs
    job = options.job
    useWnVob = 1

    outputName = tagsimMethod + '-wn' if useWnVob else tagsimMethod

    if tagsimMethod == 'wns':
        resultfile = os.path.join(rootpath, testCollection, "tagrel", testsetName, outputName,'id.tagvotes.txt')
    else:    
        resultfile = os.path.join(rootpath, testCollection, "tagrel", testsetName, trainCollection, outputName,'id.tagvotes.txt')
    if numjobs>1:
        resultfile = resultfile.replace("id.tagvotes.txt", "id.tagvotes.%d.%d.txt" % (numjobs,job))

    if checkToSkip(resultfile, overwrite):
        sys.exit(0)

    makedirsforfile(resultfile)

    try:
        doneset = set([str.split(x)[0] for x in open(options.donefile).readlines()[:-1]])
    except:
        doneset = set()
        
    printStatus(INFO, "done set: %d" % len(doneset))

 
    testImageSet = readImageSet(testCollection, testCollection, rootpath)
    testImageSet = [x for x in testImageSet if x not in doneset]
    testImageSet = [testImageSet[i] for i in range(len(testImageSet)) if (i%numjobs+1) == job]
    printStatus(INFO, 'working on %d-%d, %d test images -> %s' % (numjobs,job,len(testImageSet),resultfile) )
    
    testreader = TagReader(testCollection, rootpath=rootpath)    

    if tagsimMethod == "wns":
        tagrel = SIM_TO_TAGREL["wns"](trainCollection, useWnVob, "wup", rootpath)
    else:
        tagrel = SIM_TO_TAGREL[tagsimMethod](trainCollection, useWnVob, rootpath)

 
    done = 0
    fw = open(resultfile, "w")
    
    for qry_id in testImageSet:
        qry_tags = testreader.get(qry_id)    
        tagvotes = tagrel.estimate(qry_tags)
        newline = qry_id + " " + " ".join(["%s %s" % (tag, niceNumber(vote,8)) for (tag,vote) in tagvotes])
        fw.write(newline+"\n")
        done += 1
        if done%1000 == 0:
            printStatus(INFO, "%d done" % done)
    # done    
    fw.close()
    printStatus(INFO, "%d done" % done)
Пример #3
0
def process(options, testCollection, trainCollection, trainAnnotationName, feature, modelName):
    assert(modelName.startswith('fastlinear'))
    
    rootpath = options.rootpath
    overwrite = options.overwrite
    numjobs = options.numjobs
    job = options.job
    topk = options.topk
    
    outputName = '%s,%s' % (feature,modelName)
    
    resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, trainAnnotationName, outputName, 'id.tagvotes.txt')
    if numjobs>1:
        resultfile += '.%d.%d' % (numjobs, job)

    if checkToSkip(resultfile, overwrite):
        return 0

    concepts = readConcepts(trainCollection,trainAnnotationName, rootpath=rootpath)
    nr_of_concepts = len(concepts)

    test_imset = readImageSet(testCollection, testCollection, rootpath)
    test_imset = [test_imset[i] for i in range(len(test_imset)) if i%numjobs+1 == job]
    test_imset = set(test_imset)
    nr_of_test_images = len(test_imset)
    printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,nr_of_test_images,resultfile))

    ma = ModelArray(trainCollection, trainAnnotationName, feature, modelName, rootpath=rootpath)
        
    feat_file = StreamFile(os.path.join(rootpath, testCollection, "FeatureData", feature))
    makedirsforfile(resultfile)
    fw = open(resultfile, "w")

    done = 0

    feat_file.open()
    for _id, _vec in feat_file:
        if _id not in test_imset:
            continue
       
        res = ma.predict([_vec],prob=0)
        tagvotes = res[0]
        if topk>0:
            tagvotes = tagvotes[:topk]
        newline = '%s %s\n' % (_id, " ".join(["%s %s" % (tag, niceNumber(vote,6)) for (tag,vote) in tagvotes]))
        fw.write(newline)
        done += 1
        if done % 1e4  == 0:
            printStatus(INFO, "%d done" % done)

    feat_file.close()
    fw.close()
    printStatus(INFO, "%d done" % (done))
    return done
Пример #4
0
def process(options, trainCollection, annotationName, testCollection):
    rootpath = options.rootpath
    m = options.m
    k_r = options.kr
    k_d = options.kd
    k_s = options.ks
    k_c = options.kc
    feature = options.feature
    add_bonus = options.bonus
    overwrite = options.overwrite
    
    #outputName = 'cotag,m%d,kr%d,kd%d,ks%d,kc%d,bonus%d'%(m,k_r,k_d,k_s,k_c,add_bonus)
    outputName = 'cotag' # simplify the outputName to reduce the length of the result filename
    outputName = os.path.join(outputName, feature) if (k_c>1e-6) else outputName
    resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, annotationName, outputName, 'id.tagvotes.txt')

    if checkToSkip(resultfile, overwrite):
        sys.exit(0)
     
    testImageSet = readImageSet(testCollection, testCollection, rootpath=rootpath)
    test_tag_reader = TagReader(testCollection, rootpath=rootpath)
    
    if k_c < 1e-6:
        tagger = TagCooccurTagger(testCollection, trainCollection, annotationName, rootpath=rootpath)
    else:
        tagger = TagCooccurPlusTagger(testCollection, trainCollection, annotationName, feature=feature, rootpath=rootpath)
    tagger.m = m
    tagger.k_r = k_r
    tagger.k_d = k_d
    tagger.k_s = k_s
    tagger.k_c = k_c
    tagger.add_bonus = add_bonus
    
    makedirsforfile(resultfile)
    
    fw = open(resultfile, 'w')
    
    output = []
    done = 0
    for im in testImageSet:
        user_tags = test_tag_reader.get(im)
        tagvotes = tagger.predict(content=im, context=user_tags)
        newline = '%s %s' % (im, ' '.join(['%s %s'%(x[0], niceNumber(x[1],6)) for x in tagvotes]))
        output.append(newline)
        done += 1
        if len(output) % 1e4 == 0:
            fw.write('\n'.join(output) + '\n')
            output=[]
            printStatus(INFO, '%d done' % done)
    if output:
        fw.write('\n'.join(output) + '\n')
    fw.close()
    printStatus(INFO, '%d done' % done)
Пример #5
0
def process(options, label_file, label2vec_dir, testCollection, feature, new_feature):
    rootpath = options.rootpath
    overwrite = options.overwrite
    k = options.k
    blocksize = options.blocksize
    subset = options.subset if options.subset else testCollection

    resfile = os.path.join(rootpath, testCollection, 'FeatureData', new_feature, 'id.feature.txt')
    if checkToSkip(resfile, overwrite):
        return 0

    imsetfile = os.path.join(rootpath, testCollection, 'ImageSets', '%s.txt' % subset)
    imset = map(str.strip, open(imsetfile).readlines())
    printStatus(INFO, '%d images to do' % len(imset))

    feat_file = BigFile(os.path.join(rootpath, testCollection, 'FeatureData', feature))

    im2vec = Image2Vec(label_file, label2vec_dir)


    makedirsforfile(resfile)
    fw = open(resfile, 'w')

    read_time = 0
    run_time = 0
    start = 0
    done = 0

    while start < len(imset):
        end = min(len(imset), start + blocksize)
        printStatus(INFO, 'processing images from %d to %d' % (start, end-1))

        s_time = time.time()
        renamed, test_X = feat_file.read(imset[start:end])
        read_time += time.time() - s_time
        
        s_time = time.time()
        output = [None] * len(renamed)
        for i in xrange(len(renamed)):
            vec = im2vec.embedding(test_X[i], k)
            output[i] = '%s %s\n' % (renamed[i], " ".join([niceNumber(x,6) for x in vec]))
        run_time += time.time() - s_time
        start = end
        fw.write(''.join(output))
        done += len(output)

    # done    
    printStatus(INFO, "%d done. read time %g seconds, run_time %g seconds" % (done, read_time, run_time))
    fw.close()
    return done
Пример #6
0
def evaluateSearchEngines(searchers, collection, annotationName, metric, rootpath=ROOT_PATH):
    scorer = getScorer(metric)
    concepts = readConcepts(collection, annotationName, rootpath)
    
    nr_of_runs = len(searchers)
    nr_of_concepts = len(concepts)
    results = np.zeros((nr_of_concepts,nr_of_runs))


    for i in range(nr_of_concepts):
        names, labels = readAnnotationsFrom(collection, annotationName, concepts[i], rootpath)
        name2label = dict(zip(names,labels))
        
        for j in range(nr_of_runs):
            searchresults = searchers[j].scoreCollection(concepts[i])
            sorted_labels = [name2label[name] for (name,score) in searchresults if name in name2label]
            results[i,j] = scorer.score(sorted_labels)

    for i in range(nr_of_concepts):
        print concepts[i], ' '.join([niceNumber(x,3) for x in results[i,:]])
    mean_perf = results.mean(0)
    print 'mean%s'%metric, ' '.join([niceNumber(x,3) for x in mean_perf])

    return concepts,results
Пример #7
0
def process(options, testCollection, trainCollection, annotationName, feature):
    rootpath = options.rootpath
    k = options.k
    distance = options.distance
    blocksize = options.blocksize
    donefile = options.donefile
    numjobs = options.numjobs
    job = options.job
    overwrite = options.overwrite
    taggerType = options.tagger
    noise = options.noise
    testset = options.testset
    if not testset:
        testset = testCollection

    modelName = taggerType
    if 'pretagvote' == taggerType and noise > 1e-3:
        modelName += '-noise%.2f' % noise
    if 'pqtagvote' == taggerType:
        nnName = "l2knn"
    else:
        nnName = distance + "knn"
    resultfile = os.path.join(rootpath, testCollection, 'autotagging', testset,
                              trainCollection, annotationName, modelName,
                              '%s,%s,%d' % (feature, nnName, k),
                              'id.tagvotes.txt')

    if numjobs > 1:
        resultfile += ".%d.%d" % (numjobs, job)
    if checkToSkip(resultfile, overwrite):
        return 0

    if donefile:
        doneset = set([x.split()[0] for x in open(donefile) if x.strip()])
    else:
        doneset = set()
    printStatus(
        INFO, "%d images have been done already, and they will be ignored" %
        len(doneset))

    workingSet = readImageSet(testCollection, testset, rootpath)
    workingSet = [x for x in workingSet if x not in doneset]
    workingSet = [
        workingSet[i] for i in range(len(workingSet))
        if (i % numjobs + 1) == job
    ]

    test_feat_dir = os.path.join(rootpath, testCollection, 'FeatureData',
                                 feature)
    test_feat_file = BigFile(test_feat_dir)

    tagger = NAME_TO_TAGGER[taggerType](trainCollection,
                                        annotationName,
                                        feature,
                                        distance,
                                        rootpath=rootpath)
    tagger.k = k
    tagger.noise = noise

    printStatus(
        INFO, "working on %d-%d, %d test images -> %s" %
        (numjobs, job, len(workingSet), resultfile))

    makedirsforfile(resultfile)
    fw = open(resultfile, "w")

    read_time = 0.0
    test_time = 0.0
    start = 0
    done = 0

    while start < len(workingSet):
        end = min(len(workingSet), start + blocksize)
        printStatus(INFO, 'tagging images from %d to %d' % (start, end - 1))

        s_time = time.time()
        renamed, vectors = test_feat_file.read(workingSet[start:end])
        nr_images = len(renamed)
        read_time += time.time() - s_time

        s_time = time.time()
        output = [None] * nr_images
        for i in range(nr_images):
            tagvotes = tagger.predict(content=vectors[i],
                                      context='%s,%s' %
                                      (testCollection, renamed[i]))
            output[i] = '%s %s\n' % (renamed[i], " ".join([
                "%s %s" % (tag, niceNumber(vote, 6))
                for (tag, vote) in tagvotes
            ]))
        test_time += time.time() - s_time
        start = end
        fw.write(''.join(output))
        done += len(output)

    fw.close()
    printStatus(
        INFO, '%d images tagged, read time %g seconds, test time %g seconds' %
        (done, read_time, test_time))
Пример #8
0
def process(options, testCollection, trainCollection, trainAnnotationName, feature, modelName):
    if modelName.startswith('fik'):
        from fiksvm.fiksvm import fiksvm_load_model as load_model
    else:
        from fastlinear.fastlinear import fastlinear_load_model as load_model

    rootpath = options.rootpath
    overwrite = options.overwrite
    prob_output = options.prob_output
    numjobs = options.numjobs
    job = options.job
    #blocksize = options.blocksize
    topk = options.topk
    
    outputName = '%s,%s' % (feature,modelName)
    if prob_output:
        outputName += ',prob'

    resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, trainAnnotationName, outputName, 'id.tagvotes.txt')
    if numjobs>1:
        resultfile += '.%d.%d' % (numjobs, job)

    if checkToSkip(resultfile, overwrite):
        return 0

    concepts = readConcepts(trainCollection,trainAnnotationName, rootpath=rootpath)
    nr_of_concepts = len(concepts)

    test_imset = readImageSet(testCollection, testCollection, rootpath)
    test_imset = [test_imset[i] for i in range(len(test_imset)) if i%numjobs+1 == job]
    test_imset = set(test_imset)
    nr_of_test_images = len(test_imset)
    printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,nr_of_test_images,resultfile))

    models = [None] * nr_of_concepts
    for c in range(nr_of_concepts):
        model_file_name = os.path.join(rootpath,trainCollection,'Models',trainAnnotationName,feature, modelName, '%s.model'%concepts[c])
        models[c] = load_model(model_file_name)
        if models[c] is None:
            return 0
        #(pA,pB) = model.get_probAB()
        

    feat_file = StreamFile(os.path.join(rootpath, testCollection, "FeatureData", feature))
    makedirsforfile(resultfile)
    fw = open(resultfile, "w")

    done = 0

    feat_file.open()
    for _id, _vec in feat_file:
        if _id not in test_imset:
            continue
        if prob_output:
            scores = [models[c].predict_probability(_vec) for c in range(nr_of_concepts)]
        else:
            scores = [models[c].predict(_vec) for c in range(nr_of_concepts)]

        tagvotes = sorted(zip(concepts, scores), key=lambda v:v[1], reverse=True)
        if topk>0:
            tagvotes = tagvotes[:topk]
        newline = '%s %s\n' % (_id, " ".join(["%s %s" % (tag, niceNumber(vote,6)) for (tag,vote) in tagvotes]))
        fw.write(newline)
        done += 1
        if done % 1e4  == 0:
            printStatus(INFO, "%d done" % done)

    feat_file.close()
    fw.close()
    printStatus(INFO, "%d done" % (done))
    return done
Пример #9
0
def process(options, trainCollection, annotationName, testCollection):
    rootpath = options.rootpath
    m = options.m
    k_r = options.kr
    k_d = options.kd
    k_s = options.ks
    k_c = options.kc
    feature = options.feature
    add_bonus = options.bonus
    overwrite = options.overwrite

    #outputName = 'cotag,m%d,kr%d,kd%d,ks%d,kc%d,bonus%d'%(m,k_r,k_d,k_s,k_c,add_bonus)
    outputName = 'cotag'  # simplify the outputName to reduce the length of the result filename
    outputName = os.path.join(outputName,
                              feature) if (k_c > 1e-6) else outputName
    resultfile = os.path.join(rootpath, testCollection, 'autotagging',
                              testCollection, trainCollection, annotationName,
                              outputName, 'id.tagvotes.txt')

    if checkToSkip(resultfile, overwrite):
        sys.exit(0)

    testImageSet = readImageSet(testCollection,
                                testCollection,
                                rootpath=rootpath)
    test_tag_reader = TagReader(testCollection, rootpath=rootpath)

    if k_c < 1e-6:
        tagger = TagCooccurTagger(testCollection,
                                  trainCollection,
                                  annotationName,
                                  rootpath=rootpath)
    else:
        tagger = TagCooccurPlusTagger(testCollection,
                                      trainCollection,
                                      annotationName,
                                      feature=feature,
                                      rootpath=rootpath)
    tagger.m = m
    tagger.k_r = k_r
    tagger.k_d = k_d
    tagger.k_s = k_s
    tagger.k_c = k_c
    tagger.add_bonus = add_bonus

    makedirsforfile(resultfile)

    fw = open(resultfile, 'w')

    output = []
    done = 0
    for im in testImageSet:
        user_tags = test_tag_reader.get(im)
        tagvotes = tagger.predict(content=im, context=user_tags)
        newline = '%s %s' % (im, ' '.join(
            ['%s %s' % (x[0], niceNumber(x[1], 6)) for x in tagvotes]))
        output.append(newline)
        done += 1
        if len(output) % 1e4 == 0:
            fw.write('\n'.join(output) + '\n')
            output = []
            printStatus(INFO, '%d done' % done)
    if output:
        fw.write('\n'.join(output) + '\n')
    fw.close()
    printStatus(INFO, '%d done' % done)
Пример #10
0
def process(options, testCollection, trainCollection, trainAnnotationName,
            feature, modelName):
    if modelName.startswith('fik'):
        from fiksvm.fiksvm import fiksvm_load_model as load_model
    else:
        from fastlinear.fastlinear import fastlinear_load_model as load_model

    rootpath = options.rootpath
    overwrite = options.overwrite
    prob_output = options.prob_output
    numjobs = options.numjobs
    job = options.job
    blocksize = options.blocksize

    outputName = '%s,%s' % (feature, modelName)
    if prob_output:
        outputName += ',prob'

    resultfile = os.path.join(rootpath, testCollection, 'autotagging',
                              testCollection, trainCollection,
                              trainAnnotationName, outputName,
                              'id.tagvotes.txt')
    if numjobs > 1:
        resultfile += '.%d.%d' % (numjobs, job)

    if checkToSkip(resultfile, overwrite):
        return 0

    concepts = readConcepts(trainCollection,
                            trainAnnotationName,
                            rootpath=rootpath)
    nr_of_concepts = len(concepts)

    test_imset = readImageSet(testCollection, testCollection, rootpath)
    test_imset = [
        test_imset[i] for i in range(len(test_imset)) if i % numjobs + 1 == job
    ]
    nr_of_test_images = len(test_imset)
    printStatus(
        INFO, "working on %d-%d, %d test images -> %s" %
        (numjobs, job, nr_of_test_images, resultfile))

    models = [None] * nr_of_concepts
    for c in range(nr_of_concepts):
        model_file_name = os.path.join(rootpath, trainCollection, 'Models',
                                       trainAnnotationName, feature, modelName,
                                       '%s.model' % concepts[c])
        models[c] = load_model(model_file_name)
        if models[c] is None:
            return 0
        #(pA,pB) = model.get_probAB()

    feat_file = BigFile(
        os.path.join(rootpath, testCollection, "FeatureData", feature))
    makedirsforfile(resultfile)
    fw = open(resultfile, "w")

    read_time = 0
    test_time = 0
    start = 0
    done = 0

    while start < nr_of_test_images:
        end = min(nr_of_test_images, start + blocksize)
        printStatus(INFO, 'processing images from %d to %d' % (start, end - 1))

        s_time = time.time()
        renamed, test_X = feat_file.read(test_imset[start:end])
        read_time += time.time() - s_time

        s_time = time.time()
        output = [None] * len(renamed)
        for i in xrange(len(renamed)):
            if prob_output:
                scores = [
                    models[c].predict_probability(test_X[i])
                    for c in range(nr_of_concepts)
                ]
            else:
                scores = [
                    models[c].predict(test_X[i]) for c in range(nr_of_concepts)
                ]
            #dec_value = sigmoid_predict(dec_value, A=pA, B=pB)
            tagvotes = sorted(zip(concepts, scores),
                              key=lambda v: v[1],
                              reverse=True)
            output[i] = '%s %s\n' % (renamed[i], " ".join([
                "%s %s" % (tag, niceNumber(vote, 6))
                for (tag, vote) in tagvotes
            ]))
        test_time += time.time() - s_time
        start = end
        fw.write(''.join(output))
        fw.flush()
        done += len(output)

    # done
    printStatus(
        INFO, "%d done. read time %g seconds, test_time %g seconds" %
        (done, read_time, test_time))
    fw.close()
    return done
Пример #11
0
def process(options, testCollection, trainCollection, trainAnnotationName, feature, modelName):
    if modelName.startswith('fik'):
        from fiksvm.fiksvm import fiksvm_load_model as load_model
    else:
        from fastlinear.fastlinear import fastlinear_load_model as load_model

    rootpath = options.rootpath
    overwrite = options.overwrite
    prob_output = options.prob_output
    numjobs = options.numjobs
    job = options.job
    blocksize = options.blocksize
    
    outputName = '%s,%s' % (feature,modelName)
    if prob_output:
        outputName += ',prob'

    resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, trainAnnotationName, outputName, 'id.tagvotes.txt')
    if numjobs>1:
        resultfile += '.%d.%d' % (numjobs, job)

    if checkToSkip(resultfile, overwrite):
        return 0

    concepts = readConcepts(trainCollection,trainAnnotationName, rootpath=rootpath)
    nr_of_concepts = len(concepts)

    test_imset = readImageSet(testCollection, testCollection, rootpath)
    test_imset = [test_imset[i] for i in range(len(test_imset)) if i%numjobs+1 == job]
    nr_of_test_images = len(test_imset)
    printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,nr_of_test_images,resultfile))

    models = [None] * nr_of_concepts
    for c in range(nr_of_concepts):
        model_file_name = os.path.join(rootpath,trainCollection,'Models',trainAnnotationName,feature, modelName, '%s.model'%concepts[c])
        models[c] = load_model(model_file_name)
        if models[c] is None:
            return 0
        #(pA,pB) = model.get_probAB()
        

    feat_file = BigFile(os.path.join(rootpath, testCollection, "FeatureData", feature))
    makedirsforfile(resultfile)
    fw = open(resultfile, "w")

    read_time = 0
    test_time = 0
    start = 0
    done = 0

    while start < nr_of_test_images:
        end = min(nr_of_test_images, start + blocksize)
        printStatus(INFO, 'processing images from %d to %d' % (start, end-1))

        s_time = time.time()
        renamed, test_X = feat_file.read(test_imset[start:end])
        read_time += time.time() - s_time
        
        s_time = time.time()
        output = [None] * len(renamed)
        for i in xrange(len(renamed)):
            if prob_output:
                scores = [models[c].predict_probability(test_X[i]) for c in range(nr_of_concepts)]
            else:
                scores = [models[c].predict(test_X[i]) for c in range(nr_of_concepts)]
            #dec_value = sigmoid_predict(dec_value, A=pA, B=pB)
            tagvotes = sorted(zip(concepts, scores), key=lambda v:v[1], reverse=True)
            output[i] = '%s %s\n' % (renamed[i], " ".join(["%s %s" % (tag, niceNumber(vote,6)) for (tag,vote) in tagvotes]))
        test_time += time.time() - s_time
        start = end
        fw.write(''.join(output))
        fw.flush()
        done += len(output)

    # done    
    printStatus(INFO, "%d done. read time %g seconds, test_time %g seconds" % (done, read_time, test_time))
    fw.close()
    return done
Пример #12
0
def process(options, testCollection, trainCollection, trainAnnotationName, feature, modelName):
    if modelName.startswith('fik'):
        from fiksvm.fiksvm import fiksvm_load_model as load_model
    else:
        from fastlinear.fastlinear import fastlinear_load_model as load_model

    rootpath = options.rootpath
    overwrite = options.overwrite
    prob_output = options.prob_output
    numjobs = options.numjobs
    job = options.job
    #blocksize = options.blocksize
    topk = options.topk
    
    outputName = '%s,%s' % (feature,modelName)
    if prob_output:
        outputName += ',prob'

    resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, trainAnnotationName, outputName, 'id.tagvotes.txt')
    if numjobs>1:
        resultfile += '.%d.%d' % (numjobs, job)

    if checkToSkip(resultfile, overwrite):
        return 0

    concepts = readConcepts(trainCollection,trainAnnotationName, rootpath=rootpath)
    nr_of_concepts = len(concepts)

    test_imset = readImageSet(testCollection, testCollection, rootpath)
    test_imset = [test_imset[i] for i in range(len(test_imset)) if i%numjobs+1 == job]
    test_imset = set(test_imset)
    nr_of_test_images = len(test_imset)
    printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,nr_of_test_images,resultfile))

    models = [None] * nr_of_concepts
    for c in range(nr_of_concepts):
        model_file_name = os.path.join(rootpath,trainCollection,'Models',trainAnnotationName,feature, modelName, '%s.model'%concepts[c])
        models[c] = load_model(model_file_name)
        if models[c] is None:
            return 0
        #(pA,pB) = model.get_probAB()
        

    feat_file = StreamFile(os.path.join(rootpath, testCollection, "FeatureData", feature))
    makedirsforfile(resultfile)
    fw = open(resultfile, "w")

    done = 0

    feat_file.open()
    for _id, _vec in feat_file:
        if _id not in test_imset:
            continue
        if prob_output:
            scores = [models[c].predict_probability(_vec) for c in range(nr_of_concepts)]
        else:
            scores = [models[c].predict(_vec) for c in range(nr_of_concepts)]

        tagvotes = sorted(zip(concepts, scores), key=lambda v:v[1], reverse=True)
        if topk>0:
            tagvotes = tagvotes[:topk]
        newline = '%s %s\n' % (_id, " ".join(["%s %s" % (tag, niceNumber(vote,6)) for (tag,vote) in tagvotes]))
        fw.write(newline)
        done += 1
        if done % 1e4  == 0:
            printStatus(INFO, "%d done" % done)

    feat_file.close()
    fw.close()
    printStatus(INFO, "%d done" % (done))
    return done
Пример #13
0
def process(options, testCollection, trainCollection, trainAnnotationName,
            feature, modelName):
    assert (modelName.startswith('fastlinear'))

    rootpath = options.rootpath
    overwrite = options.overwrite
    numjobs = options.numjobs
    job = options.job
    topk = options.topk

    outputName = '%s,%s' % (feature, modelName)

    resultfile = os.path.join(rootpath, testCollection, 'autotagging',
                              testCollection, trainCollection,
                              trainAnnotationName, outputName,
                              'id.tagvotes.txt')
    if numjobs > 1:
        resultfile += '.%d.%d' % (numjobs, job)

    if checkToSkip(resultfile, overwrite):
        return 0

    concepts = readConcepts(trainCollection,
                            trainAnnotationName,
                            rootpath=rootpath)
    nr_of_concepts = len(concepts)

    test_imset = readImageSet(testCollection, testCollection, rootpath)
    test_imset = [
        test_imset[i] for i in range(len(test_imset)) if i % numjobs + 1 == job
    ]
    test_imset = set(test_imset)
    nr_of_test_images = len(test_imset)
    printStatus(
        INFO, "working on %d-%d, %d test images -> %s" %
        (numjobs, job, nr_of_test_images, resultfile))

    ma = ModelArray(trainCollection,
                    trainAnnotationName,
                    feature,
                    modelName,
                    rootpath=rootpath)

    feat_file = StreamFile(
        os.path.join(rootpath, testCollection, "FeatureData", feature))
    makedirsforfile(resultfile)
    fw = open(resultfile, "w")

    done = 0

    feat_file.open()
    for _id, _vec in feat_file:
        if _id not in test_imset:
            continue

        res = ma.predict([_vec], prob=0)
        tagvotes = res[0]
        if topk > 0:
            tagvotes = tagvotes[:topk]
        newline = '%s %s\n' % (_id, " ".join(
            ["%s %s" % (tag, niceNumber(vote, 6))
             for (tag, vote) in tagvotes]))
        fw.write(newline)
        done += 1
        if done % 1e4 == 0:
            printStatus(INFO, "%d done" % done)

    feat_file.close()
    fw.close()
    printStatus(INFO, "%d done" % (done))
    return done
Пример #14
0
    for i in range(len(renamed)):
        test_id = renamed[i]
        for concept,score in res[i]:
            ranklist.setdefault(concept,[]).append((test_id,score))
          

    # evaluation
    concepts = readConcepts(testCollection,testAnnotationName,rootpath=rootpath)
 
    from basic.metric import getScorer
    scorer = getScorer('AP')
    mean_perf = 0.0

    from basic.annotationtable import readAnnotationsFrom
    from basic.common import niceNumber
    for concept in concepts:
        names,labels = readAnnotationsFrom(testCollection,testAnnotationName,concept,skip_0=True,rootpath=rootpath)
        name2label = dict(zip(names,labels))
        imagelist = ranklist[concept]
        imagelist.sort(key=lambda v:(v[1],v[0]), reverse=True)
        #print concept, imagelist[:3], imagelist[-3:]
        sorted_labels = [name2label[_id] for _id,_score in imagelist if _id in name2label]
        perf = scorer.score(sorted_labels)
        print concept, niceNumber(perf,3)
        mean_perf += perf
    mean_perf /= len(concepts)
    print 'MEAN', niceNumber(mean_perf,3)