def process(options, testCollection, trainCollection, trainAnnotationName, feature, modelName): if modelName.startswith('fik'): from fiksvm.fiksvm import fiksvm_load_model as load_model else: from fastlinear.fastlinear import fastlinear_load_model as load_model rootpath = options.rootpath overwrite = options.overwrite prob_output = options.prob_output numjobs = options.numjobs job = options.job #blocksize = options.blocksize topk = options.topk outputName = '%s,%s' % (feature,modelName) if prob_output: outputName += ',prob' resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, trainAnnotationName, outputName, 'id.tagvotes.txt') if numjobs>1: resultfile += '.%d.%d' % (numjobs, job) if checkToSkip(resultfile, overwrite): return 0 concepts = readConcepts(trainCollection,trainAnnotationName, rootpath=rootpath) nr_of_concepts = len(concepts) test_imset = readImageSet(testCollection, testCollection, rootpath) test_imset = [test_imset[i] for i in range(len(test_imset)) if i%numjobs+1 == job] test_imset = set(test_imset) nr_of_test_images = len(test_imset) printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,nr_of_test_images,resultfile)) models = [None] * nr_of_concepts for c in range(nr_of_concepts): model_file_name = os.path.join(rootpath,trainCollection,'Models',trainAnnotationName,feature, modelName, '%s.model'%concepts[c]) models[c] = load_model(model_file_name) if models[c] is None: return 0 #(pA,pB) = model.get_probAB() feat_file = StreamFile(os.path.join(rootpath, testCollection, "FeatureData", feature)) makedirsforfile(resultfile) fw = open(resultfile, "w") done = 0 feat_file.open() for _id, _vec in feat_file: if _id not in test_imset: continue if prob_output: scores = [models[c].predict_probability(_vec) for c in range(nr_of_concepts)] else: scores = [models[c].predict(_vec) for c in range(nr_of_concepts)] tagvotes = sorted(zip(concepts, scores), key=lambda v:v[1], reverse=True) if topk>0: tagvotes = tagvotes[:topk] newline = '%s %s\n' % (_id, " ".join(["%s %s" % (tag, niceNumber(vote,6)) for (tag,vote) in tagvotes])) fw.write(newline) done += 1 if done % 1e4 == 0: printStatus(INFO, "%d done" % done) feat_file.close() fw.close() printStatus(INFO, "%d done" % (done)) return done
from fiksvm.fiksvm import fiksvm_load_model as load_model else: from fastlinear.fastlinear import fastlinear_load_model as load_model test_imset = readImageSet(testCollection, testCollection, rootpath=rootpath) test_feat_file = BigFile(os.path.join(rootpath,testCollection,'FeatureData',feature)) test_renamed, test_vectors = test_feat_file.read(test_imset) concepts = readConcepts(testCollection, testAnnotationName, rootpath=rootpath) print ('### %s' % os.path.join(trainCollection, 'Models', trainAnnotationName, feature, modelName)) results = [] for concept in concepts: model_file_name = os.path.join(rootpath, trainCollection, 'Models', trainAnnotationName, feature, modelName, '%s.model' % concept) model = load_model(model_file_name) ranklist = [(test_renamed[i], model.predict(test_vectors[i])) for i in range(len(test_renamed))] ranklist.sort(key=lambda v:v[1], reverse=True) names,labels = readAnnotationsFrom(testCollection, testAnnotationName, concept, skip_0=True, rootpath=rootpath) test_name2label = dict(zip(names,labels)) sorted_labels = [test_name2label[x[0]] for x in ranklist if x[0] in test_name2label] perf = scorer.score(sorted_labels) print ('%s %g' % (concept, perf)) results.append((concept, perf)) mean_perf = sum([x[1] for x in results]) / len(concepts) print ('mean%s %g' % (metric, mean_perf))
def process(options, testCollection, trainCollection, trainAnnotationName, feature, modelName): if modelName.startswith('fik'): from fiksvm.fiksvm import fiksvm_load_model as load_model else: from fastlinear.fastlinear import fastlinear_load_model as load_model rootpath = options.rootpath overwrite = options.overwrite prob_output = options.prob_output numjobs = options.numjobs job = options.job blocksize = options.blocksize outputName = '%s,%s' % (feature, modelName) if prob_output: outputName += ',prob' resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, trainAnnotationName, outputName, 'id.tagvotes.txt') if numjobs > 1: resultfile += '.%d.%d' % (numjobs, job) if checkToSkip(resultfile, overwrite): return 0 concepts = readConcepts(trainCollection, trainAnnotationName, rootpath=rootpath) nr_of_concepts = len(concepts) test_imset = readImageSet(testCollection, testCollection, rootpath) test_imset = [ test_imset[i] for i in range(len(test_imset)) if i % numjobs + 1 == job ] nr_of_test_images = len(test_imset) printStatus( INFO, "working on %d-%d, %d test images -> %s" % (numjobs, job, nr_of_test_images, resultfile)) models = [None] * nr_of_concepts for c in range(nr_of_concepts): model_file_name = os.path.join(rootpath, trainCollection, 'Models', trainAnnotationName, feature, modelName, '%s.model' % concepts[c]) models[c] = load_model(model_file_name) if models[c] is None: return 0 #(pA,pB) = model.get_probAB() feat_file = BigFile( os.path.join(rootpath, testCollection, "FeatureData", feature)) makedirsforfile(resultfile) fw = open(resultfile, "w") read_time = 0 test_time = 0 start = 0 done = 0 while start < nr_of_test_images: end = min(nr_of_test_images, start + blocksize) printStatus(INFO, 'processing images from %d to %d' % (start, end - 1)) s_time = time.time() renamed, test_X = feat_file.read(test_imset[start:end]) read_time += time.time() - s_time s_time = time.time() output = [None] * len(renamed) for i in xrange(len(renamed)): if prob_output: scores = [ models[c].predict_probability(test_X[i]) for c in range(nr_of_concepts) ] else: scores = [ models[c].predict(test_X[i]) for c in range(nr_of_concepts) ] #dec_value = sigmoid_predict(dec_value, A=pA, B=pB) tagvotes = sorted(zip(concepts, scores), key=lambda v: v[1], reverse=True) output[i] = '%s %s\n' % (renamed[i], " ".join([ "%s %s" % (tag, niceNumber(vote, 6)) for (tag, vote) in tagvotes ])) test_time += time.time() - s_time start = end fw.write(''.join(output)) fw.flush() done += len(output) # done printStatus( INFO, "%d done. read time %g seconds, test_time %g seconds" % (done, read_time, test_time)) fw.close() return done
def process(options, testCollection, trainCollection, trainAnnotationName, feature, modelName): if modelName.startswith('fik'): from fiksvm.fiksvm import fiksvm_load_model as load_model else: from fastlinear.fastlinear import fastlinear_load_model as load_model rootpath = options.rootpath overwrite = options.overwrite prob_output = options.prob_output numjobs = options.numjobs job = options.job blocksize = options.blocksize outputName = '%s,%s' % (feature,modelName) if prob_output: outputName += ',prob' resultfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, trainAnnotationName, outputName, 'id.tagvotes.txt') if numjobs>1: resultfile += '.%d.%d' % (numjobs, job) if checkToSkip(resultfile, overwrite): return 0 concepts = readConcepts(trainCollection,trainAnnotationName, rootpath=rootpath) nr_of_concepts = len(concepts) test_imset = readImageSet(testCollection, testCollection, rootpath) test_imset = [test_imset[i] for i in range(len(test_imset)) if i%numjobs+1 == job] nr_of_test_images = len(test_imset) printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,nr_of_test_images,resultfile)) models = [None] * nr_of_concepts for c in range(nr_of_concepts): model_file_name = os.path.join(rootpath,trainCollection,'Models',trainAnnotationName,feature, modelName, '%s.model'%concepts[c]) models[c] = load_model(model_file_name) if models[c] is None: return 0 #(pA,pB) = model.get_probAB() feat_file = BigFile(os.path.join(rootpath, testCollection, "FeatureData", feature)) makedirsforfile(resultfile) fw = open(resultfile, "w") read_time = 0 test_time = 0 start = 0 done = 0 while start < nr_of_test_images: end = min(nr_of_test_images, start + blocksize) printStatus(INFO, 'processing images from %d to %d' % (start, end-1)) s_time = time.time() renamed, test_X = feat_file.read(test_imset[start:end]) read_time += time.time() - s_time s_time = time.time() output = [None] * len(renamed) for i in xrange(len(renamed)): if prob_output: scores = [models[c].predict_probability(test_X[i]) for c in range(nr_of_concepts)] else: scores = [models[c].predict(test_X[i]) for c in range(nr_of_concepts)] #dec_value = sigmoid_predict(dec_value, A=pA, B=pB) tagvotes = sorted(zip(concepts, scores), key=lambda v:v[1], reverse=True) output[i] = '%s %s\n' % (renamed[i], " ".join(["%s %s" % (tag, niceNumber(vote,6)) for (tag,vote) in tagvotes])) test_time += time.time() - s_time start = end fw.write(''.join(output)) fw.flush() done += len(output) # done printStatus(INFO, "%d done. read time %g seconds, test_time %g seconds" % (done, read_time, test_time)) fw.close() return done
os.path.join(rootpath, testCollection, 'FeatureData', feature)) test_renamed, test_vectors = test_feat_file.read(test_imset) concepts = readConcepts(testCollection, testAnnotationName, rootpath=rootpath) print('### %s' % os.path.join(trainCollection, 'Models', trainAnnotationName, feature, modelName)) results = [] for concept in concepts: model_file_name = os.path.join(rootpath, trainCollection, 'Models', trainAnnotationName, feature, modelName, '%s.model' % concept) model = load_model(model_file_name) ranklist = [(test_renamed[i], model.predict(test_vectors[i])) for i in range(len(test_renamed))] ranklist.sort(key=lambda v: v[1], reverse=True) names, labels = readAnnotationsFrom(testCollection, testAnnotationName, concept, skip_0=True, rootpath=rootpath) test_name2label = dict(zip(names, labels)) sorted_labels = [ test_name2label[x[0]] for x in ranklist if x[0] in test_name2label ] perf = scorer.score(sorted_labels)