def process(options, testCollection, trainCollection, annotationName, feature, outputpkl): rootpath = options.rootpath k = options.k distance = options.distance variant = options.variant overwrite = options.overwrite testset = testCollection forcetrainmodel = options.trainmodel modelName = "tagprop" nnName = distance + "knn" printStatus(INFO, "Starting TagProp %s,%s,%s,%s,%s" % (variant, trainCollection, testCollection, annotationName, feature)) resultfile = os.path.join(outputpkl) resultfile_tagprop = os.path.join(rootpath, testCollection, 'TagProp-Prediction', testset, trainCollection, annotationName, modelName, '%s,%s,%s,%d'%(feature,nnName,variant,k), 'prediction.mat') if checkToSkip(resultfile, overwrite) or checkToSkip(resultfile_tagprop, overwrite): return 0 tagmatrix_file = os.path.join(rootpath, trainCollection, 'TextData', 'lemm_wordnet_freq_tags.h5') if not os.path.exists(tagmatrix_file): printStatus(INFO, "Tag matrix file not found at %s Did you run wordnet_frequency_tags.py?" % (tagmatrix_file)) sys.exit(1) train_neighs_file = os.path.join(rootpath, trainCollection, 'TagProp-data', trainCollection, '%s,%s,%d'%(feature,nnName,k), 'nn_train.h5') if not os.path.exists(train_neighs_file): printStatus(INFO, "Matlab train neighbors file not found at %s Did you run prepare_tagprop_data.py?" % (train_neighs_file)) sys.exit(1) # do we need to perform learning? train_model_file = os.path.join(rootpath, trainCollection, 'TagProp-models', '%s,%s,%s,%d'%(feature,nnName,variant,k), 'model.mat') if os.path.exists(train_model_file) and not forcetrainmodel: printStatus(INFO, "model for %s available at %s" % (trainCollection, train_model_file)) else: printStatus(INFO, "starting learning model for %s" % (trainCollection)) makedirsforfile(train_model_file) script = """ tagprop_path = 'model_based/tagprop/TagProp/'; addpath(tagprop_path); tagmatrix = h5read('%s', '/tagmatrix') > 0.5; tagmatrix = sparse(tagmatrix); NN = h5read('%s', '/NN'); NN = NN(2:end, :); NN = double(NN); """ % (tagmatrix_file, train_neighs_file) if variant == 'dist' or variant == 'distsigmoids': script += """ NND = h5read('%s', '/NND'); NND = NND(2:end, :); NND = reshape(NND, 1, size(NND,1), size(NND,2)); NND = double(NND); """ % train_neighs_file if variant == 'rank': script += """ m = tagprop_learn(NN,[],tagmatrix); """ elif variant == 'ranksigmoids': script += """ m = tagprop_learn(NN,[],tagmatrix,'sigmoids',true); """ elif variant == 'dist': script += """ m = tagprop_learn(NN,NND,tagmatrix,'type','dist'); """ elif variant == 'distsigmoids': script += """ m = tagprop_learn(NN,NND,tagmatrix,'type','dist','sigmoids',true); """ script += """ save('%s', 'm', '-v7.3'); exit; """ % train_model_file call_matlab(script) # we perform prediction printStatus(INFO, "starting prediction") test_neighs_file = os.path.join(rootpath, testCollection, 'TagProp-data', testset, trainCollection, annotationName, '%s,%s,%d'%(feature,nnName,k), 'nn_test.h5') if not os.path.exists(test_neighs_file): printStatus(INFO, "Matlab test neighbors file not found at %s Did you run prepare_tagprop_data.py?" % (test_neighs_file)) sys.exit(1) script = """ tagprop_path = 'model_based/tagprop/TagProp/'; addpath(tagprop_path); load('%s'); tagmatrix = h5read('%s', '/tagmatrix') > 0.5; tagmatrix = sparse(tagmatrix); NNT = h5read('%s', '/NNT'); NNT = double(NNT); """ % (train_model_file, tagmatrix_file, test_neighs_file) if variant == 'dist' or variant == 'distsigmoids': script += """ NNDT = h5read('%s', '/NNDT'); NNDT = reshape(NNDT, 1, size(NNDT,1), size(NNDT,2)); NNDT = double(NNDT); """ % test_neighs_file script += """ P = tagprop_predict(NNT,[],m)'; save('%s', '-v7.3'); exit; """ % resultfile_tagprop makedirsforfile(resultfile_tagprop) call_matlab(script) # save results in pkl format printStatus(INFO, "Dump results in pkl format at %s" % resultfile) concepts = readConcepts(testCollection, annotationName, rootpath) id_images = readImageSet(testCollection, testset, rootpath) id_images.sort() # id_images = map(int, id_images) # concepts mapping tagprop_output = h5py.File(resultfile_tagprop, 'r') tagprop_input = h5py.File(tagmatrix_file, 'r') mapping = getVocabMap(list(tagprop_input['vocab'][:]),concepts) final_tagmatrix = tagprop_output['P'][:][:,mapping] with open(resultfile, 'w') as f: pickle.dump({'concepts':concepts, 'id_images':id_images, 'scores':final_tagmatrix}, f, pickle.HIGHEST_PROTOCOL)
def process(options, testCollection, trainCollection, annotationName, feature, outputpkl): rootpath = options.rootpath k = options.k distance = options.distance variant = options.variant overwrite = options.overwrite testset = testCollection forcetrainmodel = options.trainmodel modelName = "tagprop" nnName = distance + "knn" printStatus(INFO, "Starting TagProp %s,%s,%s,%s,%s" % (variant, trainCollection, testCollection, annotationName, feature)) resultfile = os.path.join(outputpkl) resultfile_tagprop = os.path.join(rootpath, testCollection, 'TagProp-Prediction', testset, trainCollection, annotationName, modelName, '%s,%s,%s,%d'%(feature,nnName,variant,k), 'prediction.mat') # if checkToSkip(resultfile, overwrite) or checkToSkip(resultfile_tagprop, overwrite): # return 0 tagmatrix_file = os.path.join(rootpath, trainCollection, 'TextData', 'lemm_wordnet_freq_tags.h5') if not os.path.exists(tagmatrix_file): printStatus(INFO, "Tag matrix file not found at %s Did you run wordnet_frequency_tags.py?" % (tagmatrix_file)) sys.exit(1) train_neighs_file = os.path.join(rootpath, trainCollection, 'TagProp-data', trainCollection, '%s,%s,%d'%(feature,nnName,k), 'nn_train.h5') if not os.path.exists(train_neighs_file): printStatus(INFO, "Matlab train neighbors file not found at %s Did you run prepare_tagprop_data.py?" % (train_neighs_file)) sys.exit(1) # do we need to perform learning? train_model_file = os.path.join(rootpath, trainCollection, 'TagProp-models', '%s,%s,%s,%d'%(feature,nnName,variant,k), 'model.mat') # if os.path.exists(train_model_file) and not forcetrainmodel: if False: printStatus(INFO, "model for %s available at %s" % (trainCollection, train_model_file)) else: printStatus(INFO, "starting learning model for %s" % (trainCollection)) makedirsforfile(train_model_file) # print(tagmatrix_file, train_neighs_file) # exit() script = """ tagprop_path = '%s/model_based/tagprop/TagProp/'; addpath(tagprop_path); tagmatrix = h5read('%s', '/tagmatrix') > 0.5; tagmatrix = sparse(tagmatrix); NN = h5read('%s', '/NN'); NN = NN(2:end, :); NN = double(NN); """ % (survey_code, tagmatrix_file, train_neighs_file) if variant == 'dist' or variant == 'distsigmoids': script += """ NND = h5read('%s', '/NND'); NND = NND(2:end, :); NND = reshape(NND, 1, size(NND,1), size(NND,2)); NND = double(NND); """ % train_neighs_file if variant == 'rank': script += """ m = tagprop_learn(NN,[],tagmatrix); """ elif variant == 'ranksigmoids': script += """ m = tagprop_learn(NN,[],tagmatrix,'sigmoids',true); """ elif variant == 'dist': script += """ m = tagprop_learn(NN,NND,tagmatrix,'type','dist'); """ elif variant == 'distsigmoids': script += """ m = tagprop_learn(NN,NND,tagmatrix,'type','dist','sigmoids',true); """ script += """ save('%s', 'm', '-v7.3'); """ % train_model_file # call_matlab(script) # print(script) # exit() # we perform prediction printStatus(INFO, "starting prediction") test_neighs_file = os.path.join(rootpath, testCollection, 'TagProp-data', testset, trainCollection, annotationName, '%s,%s,%d'%(feature,nnName,k), 'nn_test.h5') if not os.path.exists(test_neighs_file): printStatus(INFO, "Matlab test neighbors file not found at %s Did you run prepare_tagprop_data.py?" % (test_neighs_file)) sys.exit(1) script += """ tagprop_path = '%s/model_based/tagprop/TagProp/'; addpath(tagprop_path); load('%s'); tagmatrix = h5read('%s', '/tagmatrix') > 0.5; tagmatrix = sparse(tagmatrix); NNT = h5read('%s', '/NNT'); NNT = double(NNT); """ % (survey_code, train_model_file, tagmatrix_file, test_neighs_file) if variant == 'dist' or variant == 'distsigmoids': script += """ NNDT = h5read('%s', '/NNDT'); NNDT = reshape(NNDT, 1, size(NNDT,1), size(NNDT,2)); NNDT = double(NNDT); """ % test_neighs_file script += """ P = tagprop_predict(NNT,[],m)'; save('%s', '-v7.3'); exit; """ % resultfile_tagprop # print(script) makedirsforfile(resultfile_tagprop) call_matlab(script) # exit() # save results in pkl format printStatus(INFO, "Dump results in pkl format at %s" % resultfile) concepts = readConcepts(testCollection, annotationName, rootpath) id_images = readImageSet(testCollection, testset, rootpath) id_images.sort() # id_images = map(int, id_images) # concepts mapping tagprop_output = h5py.File(resultfile_tagprop, 'r') tagprop_input = h5py.File(tagmatrix_file, 'r') mapping = getVocabMap(list(tagprop_input['vocab'][:]),concepts) final_tagmatrix = tagprop_output['P'][:][:,mapping] with open(resultfile, 'w') as f: pickle.dump({'concepts':concepts, 'id_images':id_images, 'scores':final_tagmatrix}, f, pickle.HIGHEST_PROTOCOL)
def process(options, workingCollection, annotationName, feature, outputpkl): rootpath = options.rootpath distance = options.distance overwrite = options.overwrite k_ratio = options.kratio ratio_cs = options.ratiocs lambda1 = options.lambda1 lambda2 = options.lambda2 outputonlytest = options.outputonlytest rawtagmatrix = options.rawtagmatrix modelName = "robustpca" nnName = distance + "knn" printStatus(INFO, "Starting RobustPCA %s,%s,%s,%s,%f,%f,%f" % (workingCollection, annotationName, feature, nnName, k_ratio, lambda1, lambda2)) if rawtagmatrix: printStatus(INFO, "Using raw tag matrix.") else: printStatus(INFO, "Using preprocessed tag matrix.") resultfile = os.path.join(outputpkl) resultfile_robustpca = os.path.join(rootpath, workingCollection, 'RobustPCA-Prediction', '%s,%s,%f,%f,%f,%d'%(feature,nnName,lambda1,lambda2,k_ratio,rawtagmatrix), 'prediction.mat') if checkToSkip(resultfile_robustpca, overwrite): only_dump = True else: only_dump = False if not rawtagmatrix: tagmatrix_file = os.path.join(rootpath, workingCollection, 'RobustPCA', '%s,%s,%f'%(feature,nnName,DEFAULT_K_PROP), 'tagmatrix.h5') if not os.path.exists(tagmatrix_file): printStatus(INFO, "Tag matrix file not found at %s Did you run robustpca_preprocessing.py?" % (tagmatrix_file)) sys.exit(1) else: tagmatrix_file = os.path.join(rootpath, workingCollection, 'TextData', "lemm_wordnet_freq_tags.h5") if not os.path.exists(tagmatrix_file): printStatus(INFO, 'Tag matrix file not found in %s Did you run wordnet_frequency_tags.py?' % (tagmatrix_file)) sys.exit(1) laplacianI_file = os.path.join(rootpath, workingCollection, 'LaplacianI', workingCollection, '%s,%s,%f'%(feature,nnName,k_ratio), 'laplacianI.mat') if not os.path.exists(laplacianI_file): printStatus(INFO, "LaplacianI file not found at %s Did you run laplacian_images.py?" % (laplacianI_file)) sys.exit(1) laplacianT_file = os.path.join(rootpath, workingCollection, 'LaplacianT', '%f'%(ratio_cs), 'laplacianT.mat') if not os.path.exists(laplacianT_file): printStatus(INFO, "LaplacianT file not found at %s Did you run laplacian_tags.py?" % (laplacianT_file)) sys.exit(1) # being learning script = """ rpca_path = 'transduction_based/robustpca/'; addpath(rpca_path); addpath([rpca_path, 'fast_svd/']); tagmatrix = sparse(double(h5read('%s', '/tagmatrix'))); load('%s'); load('%s'); lambda1 = %f; lambda2 = %f; maxIters = 50; precision = 1e-4; mu_start = 1.; parpool('local', 4); [P,E]=robustpca(tagmatrix, lambda1, lambda2, tag_similarity, im_similarity, maxIters, precision, mu_start); """ % (tagmatrix_file, laplacianI_file, laplacianT_file, lambda1, lambda2) script += """ delete(gcp); save('%s', 'P', 'E', 'lambda1', 'lambda2', '-v7.3'); exit; """ % resultfile_robustpca if not only_dump: printStatus(INFO, "starting learning") makedirsforfile(resultfile_robustpca) call_matlab(script) if checkToSkip(resultfile, overwrite): return 0 # save results in pkl format printStatus(INFO, "Dump results in pkl format at %s" % resultfile) concepts = readConcepts(workingCollection, annotationName, rootpath) if outputonlytest: testset_id_images = readImageSet(workingCollection.split('+')[1], workingCollection.split('+')[1], rootpath) testset_id_images.sort() id_images = readImageSet(workingCollection, workingCollection, rootpath) id_images.sort() # concepts mapping robustpca_output = h5py.File(resultfile_robustpca, 'r') tagprop_input = h5py.File(tagmatrix_file, 'r') mapping = getVocabMap(list(tagprop_input['vocab'][:]),concepts) predicted_tagmatrix = robustpca_output['P'][:,mapping] if outputonlytest: idx = np.array([bisect_index(id_images, x) for x in testset_id_images]) final_tagmatrix = predicted_tagmatrix[idx, :] assert(final_tagmatrix.shape[0] == idx.shape[0]) id_images = testset_id_images else: final_tagmatrix = predicted_tagmatrix makedirsforfile(resultfile) with open(resultfile, 'w') as f: pickle.dump({'concepts':concepts, 'id_images': id_images, 'scores':final_tagmatrix}, f, pickle.HIGHEST_PROTOCOL)
def process(options, workingCollection, annotationName, feature, outputpkl): rootpath = options.rootpath distance = options.distance overwrite = options.overwrite k_ratio = options.kratio ratio_cs = options.ratiocs lambda1 = options.lambda1 lambda2 = options.lambda2 outputonlytest = options.outputonlytest rawtagmatrix = options.rawtagmatrix modelName = "robustpca" nnName = distance + "knn" printStatus( INFO, "Starting RobustPCA %s,%s,%s,%s,%f,%f,%f" % (workingCollection, annotationName, feature, nnName, k_ratio, lambda1, lambda2)) if rawtagmatrix: printStatus(INFO, "Using raw tag matrix.") else: printStatus(INFO, "Using preprocessed tag matrix.") resultfile = os.path.join(outputpkl) resultfile_robustpca = os.path.join( rootpath, workingCollection, 'RobustPCA-Prediction', '%s,%s,%f,%f,%f,%d' % (feature, nnName, lambda1, lambda2, k_ratio, rawtagmatrix), 'prediction.mat') if checkToSkip(resultfile_robustpca, overwrite): only_dump = True else: only_dump = False if not rawtagmatrix: tagmatrix_file = os.path.join( rootpath, workingCollection, 'RobustPCA', '%s,%s,%f' % (feature, nnName, DEFAULT_K_PROP), 'tagmatrix.h5') if not os.path.exists(tagmatrix_file): printStatus( INFO, "Tag matrix file not found at %s Did you run robustpca_preprocessing.py?" % (tagmatrix_file)) sys.exit(1) else: tagmatrix_file = os.path.join(rootpath, workingCollection, 'TextData', "lemm_wordnet_freq_tags.h5") if not os.path.exists(tagmatrix_file): printStatus( INFO, 'Tag matrix file not found in %s Did you run wordnet_frequency_tags.py?' % (tagmatrix_file)) sys.exit(1) laplacianI_file = os.path.join(rootpath, workingCollection, 'LaplacianI', workingCollection, '%s,%s,%f' % (feature, nnName, k_ratio), 'laplacianI.mat') if not os.path.exists(laplacianI_file): printStatus( INFO, "LaplacianI file not found at %s Did you run laplacian_images.py?" % (laplacianI_file)) sys.exit(1) laplacianT_file = os.path.join(rootpath, workingCollection, 'LaplacianT', '%f' % (ratio_cs), 'laplacianT.mat') if not os.path.exists(laplacianT_file): printStatus( INFO, "LaplacianT file not found at %s Did you run laplacian_tags.py?" % (laplacianT_file)) sys.exit(1) # being learning script = """ rpca_path = 'transduction_based/robustpca/'; addpath(rpca_path); addpath([rpca_path, 'fast_svd/']); tagmatrix = sparse(double(h5read('%s', '/tagmatrix'))); load('%s'); load('%s'); lambda1 = %f; lambda2 = %f; maxIters = 50; precision = 1e-4; mu_start = 1.; parpool('local', 4); [P,E]=robustpca(tagmatrix, lambda1, lambda2, tag_similarity, im_similarity, maxIters, precision, mu_start); """ % (tagmatrix_file, laplacianI_file, laplacianT_file, lambda1, lambda2) script += """ delete(gcp); save('%s', 'P', 'E', 'lambda1', 'lambda2', '-v7.3'); exit; """ % resultfile_robustpca if not only_dump: printStatus(INFO, "starting learning") makedirsforfile(resultfile_robustpca) call_matlab(script) if checkToSkip(resultfile, overwrite): return 0 # save results in pkl format printStatus(INFO, "Dump results in pkl format at %s" % resultfile) concepts = readConcepts(workingCollection, annotationName, rootpath) if outputonlytest: testset_id_images = readImageSet( workingCollection.split('+')[1], workingCollection.split('+')[1], rootpath) testset_id_images.sort() id_images = readImageSet(workingCollection, workingCollection, rootpath) id_images.sort() # concepts mapping robustpca_output = h5py.File(resultfile_robustpca, 'r') tagprop_input = h5py.File(tagmatrix_file, 'r') mapping = getVocabMap(list(tagprop_input['vocab'][:]), concepts) predicted_tagmatrix = robustpca_output['P'][:, mapping] if outputonlytest: idx = np.array([bisect_index(id_images, x) for x in testset_id_images]) final_tagmatrix = predicted_tagmatrix[idx, :] assert (final_tagmatrix.shape[0] == idx.shape[0]) id_images = testset_id_images else: final_tagmatrix = predicted_tagmatrix makedirsforfile(resultfile) with open(resultfile, 'w') as f: pickle.dump( { 'concepts': concepts, 'id_images': id_images, 'scores': final_tagmatrix }, f, pickle.HIGHEST_PROTOCOL)
pkl_file = open(sys.argv[2], 'w') workingCollection = sys.argv[3] annotationName = sys.argv[4] rootpath = ROOT_PATH id_images = tagmatrix_file['id_images'] concepts = readConcepts(workingCollection, annotationName, rootpath) testset_id_images = readImageSet(workingCollection.split('+')[1], workingCollection.split('+')[1], rootpath) testset_id_images.sort() if not type(id_images[0]) is str: id_images = map(str, id_images) if not type(testset_id_images[0]) is str: testset_id_images = map(str, testset_id_images) mapping = getVocabMap(list(tagmatrix_file['vocab'][:]),concepts) predicted_tagmatrix = tagmatrix_file['tagmatrix'][:,mapping] print "predicted_tagmatrix.shape = ", predicted_tagmatrix.shape print "len(id_images) = ", len(id_images) print "len(testset_id_images) = ", len(testset_id_images) idx = np.array([bisect_index(id_images, x) for x in testset_id_images]) final_tagmatrix = predicted_tagmatrix[idx, :] id_images = testset_id_images print "dumping %d elements..." % len(id_images) pickle.dump({'concepts':concepts, 'id_images':map(int, id_images), 'scores':final_tagmatrix}, pkl_file, pickle.HIGHEST_PROTOCOL)