def process(options, testCollection, trainCollection, tagsimMethod): rootpath = options.rootpath overwrite = options.overwrite testsetName = options.testset if options.testset else testCollection tpp = options.tpp numjobs = options.numjobs job = options.job useWnVob = 1 outputName = tagsimMethod + '-wn' if useWnVob else tagsimMethod if tagsimMethod == 'wns': resultfile = os.path.join(rootpath, testCollection, "tagrel", testsetName, outputName,'id.tagvotes.txt') else: resultfile = os.path.join(rootpath, testCollection, "tagrel", testsetName, trainCollection, outputName,'id.tagvotes.txt') if numjobs>1: resultfile = resultfile.replace("id.tagvotes.txt", "id.tagvotes.%d.%d.txt" % (numjobs,job)) if checkToSkip(resultfile, overwrite): sys.exit(0) makedirsforfile(resultfile) try: doneset = set([str.split(x)[0] for x in open(options.donefile).readlines()[:-1]]) except: doneset = set() printStatus(INFO, "done set: %d" % len(doneset)) testImageSet = readImageSet(testCollection, testCollection, rootpath) testImageSet = [x for x in testImageSet if x not in doneset] testImageSet = [testImageSet[i] for i in range(len(testImageSet)) if (i%numjobs+1) == job] printStatus(INFO, 'working on %d-%d, %d test images -> %s' % (numjobs,job,len(testImageSet),resultfile) ) testreader = TagReader(testCollection, rootpath=rootpath) if tagsimMethod == "wns": tagrel = SIM_TO_TAGREL["wns"](trainCollection, useWnVob, "wup", rootpath) else: tagrel = SIM_TO_TAGREL[tagsimMethod](trainCollection, useWnVob, rootpath) done = 0 fw = open(resultfile, "w") for qry_id in testImageSet: qry_tags = testreader.get(qry_id) tagvotes = tagrel.estimate(qry_tags) newline = qry_id + " " + " ".join(["%s %s" % (tag, niceNumber(vote,8)) for (tag,vote) in tagvotes]) fw.write(newline+"\n") done += 1 if done%1000 == 0: printStatus(INFO, "%d done" % done) # done fw.close() printStatus(INFO, "%d done" % done)
def process(options, collection): rootpath = options.rootpath tpp = options.tpp overwrite = options.overwrite resultfile = os.path.join(rootpath, collection, "tagrel", collection, 'tagpos,%s' % tpp, 'id.tagvotes.txt') if checkToSkip(resultfile, overwrite): sys.exit(0) imset = readImageSet(collection, collection, rootpath) printStatus(INFO, 'working on %d test images -> %s' % (len(imset), resultfile)) reader = TagReader(collection, tpp=tpp, rootpath=rootpath) makedirsforfile(resultfile) fw = open(resultfile, "w") output = [] done = 0 for im in imset: tags = reader.get(im) tagSet = set() tagSeq = [] for tag in str.split(tags): if tag not in tagSet: tagSeq.append(tag) tagSet.add(tag) assert (len(tagSeq) == len(tagSet)) nr_tags = len(tagSeq) tagvotes = [(tagSeq[i], 1.0 - float(i) / nr_tags) for i in range(nr_tags)] newline = "%s %s" % (im, " ".join( ["%s %g" % (x[0], x[1]) for x in tagvotes])) output.append(newline + "\n") done += 1 if len(output) % 1e4 == 0: printStatus( INFO, '%d %s %s' % (done, im, ' '.join( ['%s:%g' % (x[0], x[1]) for x in tagvotes[:3]]))) fw.write("".join(output)) fw.flush() output = [] if output: fw.write("".join(output)) fw.close() printStatus(INFO, 'done')
def process(options, collection): rootpath = options.rootpath tpp = options.tpp overwrite = options.overwrite resultfile = os.path.join(rootpath, collection, "tagrel", collection, 'tagpos,%s'%tpp, 'id.tagvotes.txt') if checkToSkip(resultfile, overwrite): sys.exit(0) imset = readImageSet(collection, collection, rootpath) printStatus(INFO, 'working on %d test images -> %s' % (len(imset),resultfile)) reader = TagReader(collection,tpp=tpp,rootpath=rootpath) makedirsforfile(resultfile) fw = open(resultfile, "w") output = [] done = 0 for im in imset: tags = reader.get(im) tagSet = set() tagSeq = [] for tag in str.split(tags): if tag not in tagSet: tagSeq.append(tag) tagSet.add(tag) assert(len(tagSeq) == len(tagSet)) nr_tags = len(tagSeq) tagvotes = [(tagSeq[i], 1.0-float(i)/nr_tags) for i in range(nr_tags)] newline = "%s %s" % (im, " ".join(["%s %g" % (x[0],x[1]) for x in tagvotes])) output.append(newline + "\n") done += 1 if len(output)%1e4 == 0: printStatus(INFO, '%d %s %s' % (done,im,' '.join(['%s:%g' % (x[0],x[1]) for x in tagvotes[:3]] ))) fw.write("".join(output)) fw.flush() output = [] if output: fw.write("".join(output)) fw.close() printStatus(INFO, 'done')
def process(options, testCollection, trainCollection, feature): rootpath = options.rootpath overwrite = options.overwrite tpp = options.tpp doRandomwalk = 1 #options.doRandomwalk uniqueUser = 0 #options.uniqueUser k = 1000 #options.k numjobs = options.numjobs job = options.job #resultfile = os.path.join(rootpath, testCollection, "tagrel", testCollection, trainCollection, # "%s,tagrank%d%d,%d,%s" % (feature,doRandomwalk,uniqueUser,k,tpp), "id.tagvotes.txt") resultfile = os.path.join(rootpath, testCollection, "tagrel", testCollection, trainCollection, '%s,tagrank,%s' % (feature, tpp), 'id.tagvotes.txt') if numjobs > 1: resultfile = resultfile + '.%d.%d' % (numjobs, job) if checkToSkip(resultfile, overwrite): sys.exit(0) try: doneset = set( [str.split(x)[0] for x in open(options.donefile).readlines()[:-1]]) except: doneset = set() printStatus(INFO, "done set: %d" % len(doneset)) testImageSet = readImageSet(testCollection, testCollection, rootpath) testImageSet = [x for x in testImageSet if x not in doneset] testImageSet = [ testImageSet[i] for i in range(len(testImageSet)) if (i % numjobs + 1) == job ] printStatus( INFO, 'working on %d-%d, %d test images -> %s' % (numjobs, job, len(testImageSet), resultfile)) testreader = TagReader(testCollection, rootpath=rootpath) test_feat_file = BigFile( os.path.join(rootpath, testCollection, 'FeatureData', feature)) block_size = 100 tagranking = TagRanking(trainCollection, feature=feature, k=k, rootpath=rootpath) makedirsforfile(resultfile) fw = open(resultfile, "w") done = 0 nr_of_blocks = len(testImageSet) / block_size if nr_of_blocks * block_size < len(testImageSet): nr_of_blocks += 1 for block_index in range(nr_of_blocks): start = block_index * block_size end = min(len(testImageSet), start + block_size) subset = testImageSet[start:end] if not subset: break renamed, features = test_feat_file.read(subset) printStatus(INFO, '%d - %d: %d images' % (start, end, len(subset))) output = [] for i in range(len(renamed)): qry_id = renamed[i] qry_tags = testreader.get(qry_id) qry_vec = features[i] tagvotes = tagranking.estimate( qry_vec, qry_tags) #, uniqueUser=uniqueUser, doRandomwalk=doRandomwalk) newline = "%s %s" % (qry_id, " ".join( ["%s %g" % (x[0], x[1]) for x in tagvotes])) output.append(newline + "\n") done += 1 #printStatus(INFO, '%d %s %s' % (done,qry_id,' '.join(['%s:%g' % (x[0],x[1]) for x in tagvotes[:3]] ))) fw.write("".join(output)) fw.flush() fw.close() printStatus(INFO, 'done')
def process(options, testCollection, trainCollection, feature): rootpath = options.rootpath overwrite = options.overwrite tpp = options.tpp doRandomwalk = 1 #options.doRandomwalk uniqueUser = 0 #options.uniqueUser k = 1000 #options.k numjobs = options.numjobs job = options.job #resultfile = os.path.join(rootpath, testCollection, "tagrel", testCollection, trainCollection, # "%s,tagrank%d%d,%d,%s" % (feature,doRandomwalk,uniqueUser,k,tpp), "id.tagvotes.txt") resultfile = os.path.join(rootpath, testCollection, "tagrel", testCollection, trainCollection, '%s,tagrank,%s' % (feature,tpp), 'id.tagvotes.txt') if numjobs>1: resultfile = resultfile + '.%d.%d' % (numjobs, job) if checkToSkip(resultfile, overwrite): sys.exit(0) try: doneset = set([str.split(x)[0] for x in open(options.donefile).readlines()[:-1]]) except: doneset = set() printStatus(INFO, "done set: %d" % len(doneset)) testImageSet = readImageSet(testCollection, testCollection, rootpath) testImageSet = [x for x in testImageSet if x not in doneset] testImageSet = [testImageSet[i] for i in range(len(testImageSet)) if (i%numjobs+1) == job] printStatus(INFO, 'working on %d-%d, %d test images -> %s' % (numjobs,job,len(testImageSet),resultfile) ) testreader = TagReader(testCollection, rootpath=rootpath) test_feat_file = BigFile(os.path.join(rootpath, testCollection, 'FeatureData', feature)) block_size = 100 tagranking = TagRanking(trainCollection, feature=feature, k=k, rootpath=rootpath) makedirsforfile(resultfile) fw = open(resultfile, "w") done = 0 nr_of_blocks = len(testImageSet) / block_size if nr_of_blocks * block_size < len(testImageSet): nr_of_blocks += 1 for block_index in range(nr_of_blocks): start = block_index * block_size end = min(len(testImageSet), start + block_size) subset = testImageSet[start:end] if not subset: break renamed, features = test_feat_file.read(subset) printStatus(INFO, '%d - %d: %d images' % (start, end, len(subset))) output = [] for i in range(len(renamed)): qry_id = renamed[i] qry_tags = testreader.get(qry_id) qry_vec = features[i] tagvotes = tagranking.estimate(qry_vec, qry_tags) #, uniqueUser=uniqueUser, doRandomwalk=doRandomwalk) newline = "%s %s" % (qry_id, " ".join(["%s %g" % (x[0],x[1]) for x in tagvotes])) output.append(newline + "\n") done += 1 #printStatus(INFO, '%d %s %s' % (done,qry_id,' '.join(['%s:%g' % (x[0],x[1]) for x in tagvotes[:3]] ))) fw.write("".join(output)) fw.flush() fw.close() printStatus(INFO, 'done')