def main(params): ''' iterate over all image sentence pairs in the dataset, and write a dictionary containing all words used more than 5 times :param params ''' dataset = params['dataset'] os.chdir("..") dataprovider = getDataProvider(dataset, pert = 1) os.chdir("cca") img_sentence_pair_generator = dataprovider.iterImageSentencePair() dict = {} result = {} stopwords = getStopwords() for pair in img_sentence_pair_generator: sentence = remove_common_words(pair['sentence']['tokens'],stopwords) for word in sentence: word = stem(word.decode('utf-8')).lower() if (not word in stopwords): if(not word in dict): dict[word]=1 else: dict[word]+=1 for word in dict: if(dict[word] >= 5): result[word]=dict[word] f = open("training_dictionary_pert.txt", "w+") for w in result.keys(): f.writelines(w+'\n') print('finished')
def cmd_predict_v(dataset='coco', datapath='.', model_path='.', model_name='model.pkl.gz', batch_size=128, output_v='predict_v.npy', output_r='predict_r.npy'): M = load(model_path, model_name=model_name) model = M['model'] batcher = M['batcher'] mapper = M['batcher'].mapper predict_v = predictor_v(model) predict_r = predictor_r(model) prov = dp.getDataProvider(dataset, root=datapath) sents = list(prov.iterSentences(split='val')) inputs = list( mapper.transform( [tokens(sent, tokenizer=batcher.tokenizer) for sent in sents])) print len(model.network.params()) preds_v = numpy.vstack([ predict_v(batcher.batch_inp(batch)) for batch in grouper(inputs, batch_size) ]) numpy.save(os.path.join(model_path, output_v), preds_v) preds_r = numpy.vstack([ predict_r(batcher.batch_inp(batch)) for batch in grouper(inputs, batch_size) ]) numpy.save(os.path.join(model_path, output_r), preds_r)
def cmd_eval(dataset='coco', datapath='.', scaler_path='scaler.pkl.gz', input_v='predict_v.npy', input_r='predict_r.npy', output='eval.json'): scaler = pickle.load(gzip.open(scaler_path)) preds_v = numpy.load(input_v) preds_r = numpy.load(input_r) prov = dp.getDataProvider(dataset, root=datapath) sents = list(prov.iterSentences(split='val')) images = list(prov.iterImages(split='val')) img_fs = list(scaler.transform([ image['feat'] for image in images ])) correct_img = numpy.array([ [ sents[i]['imgid']==images[j]['imgid'] for j in range(len(images)) ] for i in range(len(sents)) ]) correct_para = numpy.array([ [ sents[i]['imgid'] == sents[j]['imgid'] for j in range(len(sents)) ] for i in range(len(sents)) ]) r_img = evaluate.ranking(img_fs, preds_v, correct_img, ns=(1,5,10), exclude_self=False) r_para_v = evaluate.ranking(preds_v, preds_v, correct_para, ns=(1,5,10), exclude_self=True) r_para_r = evaluate.ranking(preds_r, preds_r, correct_para, ns=(1,5,10), exclude_self=True) r = {'img':r_img, 'para_v':r_para_v,'para_r':r_para_r } json.dump(r, open(output, 'w')) for mode in ['img', 'para_v', 'para_r']: print '{} median_rank'.format(mode), numpy.median(r[mode]['ranks']) for n in (1,5,10): print '{} recall@{}'.format(mode, n), numpy.mean(r[mode]['recall'][n]) sys.stdout.flush()
def cmd_train_resume(dataset='coco', extra_train=False, datapath='.', model_path='.', model_name='model.pkl.gz', seed=None, shuffle=False, with_para='auto', start_epoch=1, epochs=1, batch_size=64, validate_period=64 * 100, logfile='log.txt'): def load(f): return pickle.load(gzip.open(os.path.join(model_path, f))) sys.setrecursionlimit(50000) if seed is not None: random.seed(seed) numpy.random.seed(seed) prov = dp.getDataProvider(dataset, root=datapath, extra_train=extra_train) batcher, scaler, model = map( load, ['batcher.pkl.gz', 'scaler.pkl.gz', model_name]) data = Data(prov, batcher.mapper, scaler, batch_size=batch_size, with_para=with_para, shuffle=shuffle, fit=False) do_training(logfile, epochs, start_epoch, batch_size, validate_period, model_path, model, data)
def main(): ''' Script used to gather mean and standard deviation of the length of the sentences in the training set. ''' dataset = 'flickr30k' os.chdir("..") dataprovider = getDataProvider(dataset) os.chdir("imagernn") img_sentence_pair_generator = dataprovider.iterImageSentencePair() mean = 0.0 nb_of_sentences = 0.0 for pair in img_sentence_pair_generator: l = len(pair['sentence']['tokens']) mean = mean+l nb_of_sentences=nb_of_sentences+1 mean = mean/nb_of_sentences img_sentence_pair_generator = dataprovider.iterImageSentencePair() dev = 0.0 for pair in img_sentence_pair_generator: l = len(pair['sentence']['tokens']) d = math.pow(mean-l,2) dev = dev+d dev = math.sqrt(dev/nb_of_sentences) print('mean: ', mean) print('std.dev: ',dev)
def cmd_train( dataset='coco', extra_train=False, datapath='.', model_path='.', hidden_size=1024, gru_activation=clipped_rectify, visual_activation=linear, visual_encoder=StackedGRUH0, max_norm=None, lr=0.0002, embedding_size=None, depth=1, grow_depth=None, grow_params_path=None, scaler=None, cost_visual=CosineDistance, seed=None, shuffle=False, reverse=False, with_para='auto', tokenizer='word', architecture=MultitaskLM, dropout_prob=0.0, alpha=0.1, epochs=1, batch_size=64, pad_end=False, validate_period=64*100, logfile='log.txt'): sys.setrecursionlimit(50000) # needed for pickling models if seed is not None: random.seed(seed) numpy.random.seed(seed) prov = dp.getDataProvider(dataset, root=datapath, extra_train=extra_train) mapper = util.IdMapper(min_df=10) embedding_size = embedding_size if embedding_size is not None else hidden_size scaler = StandardScaler() if scaler == 'standard' else NoScaler() data = Data(prov, mapper, scaler, batch_size=batch_size, with_para=with_para, shuffle=shuffle, reverse=reverse, tokenizer=tokenizer) data.dump(model_path) model = Imaginet(size_vocab=mapper.size(), size_embed=embedding_size, size=hidden_size, size_out=4096, depth=depth, network=architecture, cost_visual=cost_visual, alpha=alpha, gru_activation=gru_activation, visual_activation=visual_activation, visual_encoder=visual_encoder, max_norm=max_norm, lr=lr, dropout_prob=dropout_prob) start_epoch=1 grow_depth = depth if grow_depth is None else grow_depth do_training(logfile, epochs, start_epoch, batch_size, validate_period, model_path, model, data, grow_depth, grow_params_path)
def cmd_eval(dataset='coco', datapath='.', scaler_path='scaler.pkl.gz', input_v='predict_v.npy', input_r='predict_r.npy', output='eval.json'): scaler = pickle.load(gzip.open(scaler_path)) preds_v = numpy.load(input_v) preds_r = numpy.load(input_r) prov = dp.getDataProvider(dataset, root=datapath) sents = list(prov.iterSentences(split='val')) images = list(prov.iterImages(split='val')) img_fs = list(scaler.transform([image['feat'] for image in images])) correct_img = numpy.array( [[sents[i]['imgid'] == images[j]['imgid'] for j in range(len(images))] for i in range(len(sents))]) correct_para = numpy.array( [[sents[i]['imgid'] == sents[j]['imgid'] for j in range(len(sents))] for i in range(len(sents))]) r_img = evaluate.ranking(img_fs, preds_v, correct_img, ns=(1, 5, 10), exclude_self=False) r_para_v = evaluate.ranking(preds_v, preds_v, correct_para, ns=(1, 5, 10), exclude_self=True) r_para_r = evaluate.ranking(preds_r, preds_r, correct_para, ns=(1, 5, 10), exclude_self=True) r = {'img': r_img, 'para_v': r_para_v, 'para_r': r_para_r} json.dump(r, open(output, 'w')) for mode in ['img', 'para_v', 'para_r']: print '{} median_rank'.format(mode), numpy.median(r[mode]['ranks']) for n in (1, 5, 10): print '{} recall@{}'.format(mode, n), numpy.mean(r[mode]['recall'][n]) sys.stdout.flush()
def cmd_predict_v(dataset='coco', datapath='.', model_path='.', model_name='model.pkl.gz', batch_size=128, output_v='predict_v.npy', output_r='predict_r.npy'): M = load(model_path, model_name=model_name) model = M['model'] batcher = M['batcher'] mapper = M['batcher'].mapper predict_v = predictor_v(model) predict_r = predictor_r(model) prov = dp.getDataProvider(dataset, root=datapath) sents = list(prov.iterSentences(split='val')) inputs = list(mapper.transform([tokens(sent, tokenizer=batcher.tokenizer) for sent in sents ])) print len(model.network.params()) preds_v = numpy.vstack([ predict_v(batcher.batch_inp(batch)) for batch in grouper(inputs, batch_size) ]) numpy.save(os.path.join(model_path, output_v), preds_v) preds_r = numpy.vstack([ predict_r(batcher.batch_inp(batch)) for batch in grouper(inputs, batch_size) ]) numpy.save(os.path.join(model_path, output_r), preds_r)
def cmd_train_resume( dataset='coco', extra_train=False, datapath='.', model_path='.', model_name='model.pkl.gz', seed=None, shuffle=False, with_para='auto', start_epoch=1, epochs=1, batch_size=64, validate_period=64*100, logfile='log.txt'): def load(f): return pickle.load(gzip.open(os.path.join(model_path, f))) sys.setrecursionlimit(50000) if seed is not None: random.seed(seed) numpy.random.seed(seed) prov = dp.getDataProvider(dataset, root=datapath, extra_train=extra_train) batcher, scaler, model = map(load, ['batcher.pkl.gz', 'scaler.pkl.gz', model_name]) data = Data(prov, batcher.mapper, scaler, batch_size=batch_size, with_para=with_para, shuffle=shuffle, fit=False) do_training(logfile, epochs, start_epoch, batch_size, validate_period, model_path, model, data)
def main(num_hidden=50, K=150, Type=1, isBinary=0, classifier='MLP', CNNfeat='softmax', L2=0.0005, C=1): # Type, a list indicate which we want to use. dataset = 'coco' misc = {} misc['C'] = C misc['Type'] = Type misc['IsBinary'] = isBinary misc['numAnswer'] = K misc['numHidden'] = num_hidden misc['vali_size'] = 25000 misc['classifier'] = classifier misc['CNNfeat'] = CNNfeat misc['L2'] = L2 L1 = 0 dp = getDataProvider(dataset, misc['IsBinary']) print 'The K number is %d' % (misc['numAnswer']) # dp.downloadImage() # dp.loadCaption() # get the vocabulary for the answers. misc['Awordtoix'], misc['Aixtoword'], misc[ 'Avocab'] = preProBuildAnswerVocab(dp.iterAnswer('train'), misc['numAnswer']) misc['bowAnswerTrain'] = BoWAnswerEncoding(dp.iterAnswer('train'), misc['Awordtoix']) misc['bowAnswerTest'] = BoWAnswerEncoding(dp.iterAnswer('test'), misc['Awordtoix']) misc['multiAnswerTest'] = BOWMultiAnswerEncoding( dp.iterMultiAnswer('test'), misc['Awordtoix']) misc['genCaptionTest'] = BOWMultiAnswerEncoding(dp.iterGenCaption('test'), misc['Awordtoix']) misc['answerGroup'] = FindAnswerGroup(dp.iterImgIdQuestion('test')) if Type == 0: print '====================================================' print 'Test on Question, The K number is %d' % (misc['numAnswer']) print '====================================================' trainVec, testVec = preQuestion(dp, misc) if misc['classifier'] == 'SVM': out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc) else: out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 1030, misc['numHidden'], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll('test'), misc) openAnswerWriteJson(out, dp.iterAll('test'), misc) if Type == 1: print '====================================================' print 'Test on QuestionImage, The K number is %d' % (misc['numAnswer']) print '====================================================' trainVec, testVec, misc = preQuestionImage(dp, misc) Vocab_save = {} Vocab_save['Awordtoix'] = misc['Awordtoix'] Vocab_save['Aixtoword'] = misc['Aixtoword'] Vocab_save['Avocab'] = misc['Avocab'] Vocab_save['Qwordtoix0'] = misc['Qwordtoix0'] Vocab_save['Qwordtoix1'] = misc['Qwordtoix1'] Vocab_save['Qwordtoix2'] = misc['Qwordtoix2'] Vocab_save['Qwordtoix3'] = misc['Qwordtoix3'] utils.pickleSave('Vocab', Vocab_save) if misc['classifier'] == 'SVM': out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc) else: out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 2030, misc['numHidden'], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll('test'), misc) openAnswerWriteJson(out, dp.iterAll('test'), misc) if Type == 2: print '====================================================' print 'Test on Caption, The K number is %d' % (misc['numAnswer']) print '====================================================' trainVec, testVec = preCaption(dp, misc) if misc['classifier'] == 'SVM': out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc) else: out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 1000, misc['numHidden'], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll('test'), misc) openAnswerWriteJson(out, dp.iterAll('test'), misc) if Type == 3: print '====================================================' print 'Test on Image, The K number is %d' % (misc['numAnswer']) print '====================================================' trainVec, testVec = preImage(dp, misc) if misc['classifier'] == 'SVM': out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc) else: out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 1000, misc['numHidden'], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll('test'), misc) openAnswerWriteJson(out, dp.iterAll('test'), misc) if Type == 4: print '====================================================' print 'Test on QuestionCaption, The K number is %d' % ( misc['numAnswer']) print '====================================================' trainVec, testVec = preQuestionCaption(dp, misc) if misc['classifier'] == 'SVM': out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc) else: out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 2030, misc['numHidden'], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll('test'), misc) openAnswerWriteJson(out, dp.iterAll('test'), misc) if Type == 5: print '====================================================' print 'Test on QuestionImageCaption, The K number is %d' % ( misc['numAnswer']) print '====================================================' trainVec, testVec = preQuestionCaptionImg(dp, misc) if misc['classifier'] == 'SVM': out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc) else: out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 3030, misc['numHidden'], 300, misc, L1, L2) multChoiceWriteJson(out, dp.iterAll('test'), misc) openAnswerWriteJson(out, dp.iterAll('test'), misc) calAcc(out, misc)
def Prior_baseline(num_hidden=50, K=500, Type=0, isBinary=0, classifier='MLP', CNNfeat='fc7', L2=0.001, C=1): # Type, a list indicate which we want to use. dataset = 'coco' misc = {} misc['C'] = C misc['Type'] = Type misc['IsBinary'] = isBinary misc['numAnswer'] = K misc['numHidden'] = num_hidden misc['vali_size'] = 20000 misc['classifier'] = classifier misc['CNNfeat'] = CNNfeat misc['L2'] = L2 dp = getDataProvider(dataset, misc['IsBinary']) print 'The K number is %d' % (misc['numAnswer']) # dp.downloadImage() # dp.loadCaption() # get the vocabulary for the answers. misc['Awordtoix'], misc['Aixtoword'], misc[ 'Avocab'] = preProBuildAnswerVocab(dp.iterAnswer('train'), misc['numAnswer']) misc['bowAnswerTrain'] = BoWAnswerEncoding(dp.iterAnswer('train'), misc['Awordtoix']) misc['bowAnswerTest'] = BoWAnswerEncoding(dp.iterAnswer('test'), misc['Awordtoix']) misc['multiAnswerTest'] = BOWMultiAnswerEncoding( dp.iterMultiAnswer('test'), misc['Awordtoix']) misc['genCaptionTest'] = BOWMultiAnswerEncoding(dp.iterGenCaption('test'), misc['Awordtoix']) misc['answerGroup'] = FindAnswerGroup(dp.iterImgIdQuestion('test')) for misc['th'] in range(150, 400, 25): print 'th value is %d' % misc['th'] answer_counts, vocab, ques_depth = preProBuildAnswerVocabTop( dp, misc['th']) idx = 0 ans_counts = {} for sent in dp.iterQuestion('train'): string = ' '.join(sent[:ques_depth[idx]]) if string in vocab: if ans_counts.get(string, misc['numAnswer']) == misc['numAnswer']: ans_counts[string] = [misc['bowAnswerTrain'][idx]] else: ans_counts[string] = ans_counts.get( string, misc['numAnswer']) + [misc['bowAnswerTrain'][idx]] idx += 1 # get the prior of the label ans_prior = {} for key, value in ans_counts.iteritems(): tmp = {} for idx in value: tmp[idx] = tmp.get(idx, 0) + 1 tmp_idx = sorted(tmp, key=tmp.get, reverse=True)[0] ans_prior[key] = misc['Aixtoword'].get(tmp_idx) for i in range(6): print "Depth %d" % (i + 1) for key, value in sorted(ans_counts.iteritems(), key=lambda (k, v): (v, k)): if len(key.split(' ')) == i + 1: print "%s: %s " % (key, len(value)), print "" for i in range(6): print "Depth %d" % (i + 1) for tmp in vocab: if len(tmp.split(' ')) == i + 1: print "%s: %s " % (tmp, ans_prior.get(tmp)), print "" depth_count = {} for tmp in vocab: count = len(tmp.split(' ')) depth_count[count] = depth_count.get(count, 0) + 1 pdb.set_trace() max_depth = max(ques_depth) ans = [] for sent in dp.iterQuestion('test'): # iter from big to small tmp = '' for depth in range(max_depth): string = ' '.join(sent[:ques_depth[max_depth - depth]]) if string in vocab: tmp = string break if tmp != '': idx = misc['Awordtoix'].get(ans_prior.get(string)) ans.append(idx) else: ans.append(0) utils.mlpOPlable(ans, misc['bowAnswerTest'], misc['answerGroup'], misc['numAnswer']) openAnswerWriteJson(ans, dp.iterAll('test'), misc)
def cmd_train(dataset='coco', extra_train=False, datapath='.', model_path='.', hidden_size=1024, gru_activation=clipped_rectify, visual_activation=linear, visual_encoder=StackedGRUH0, max_norm=None, lr=0.0002, embedding_size=None, depth=1, grow_depth=None, grow_params_path=None, scaler=None, cost_visual=CosineDistance, seed=None, shuffle=False, reverse=False, with_para='auto', tokenizer='word', architecture=MultitaskLM, dropout_prob=0.0, alpha=0.1, epochs=1, batch_size=64, pad_end=False, validate_period=64 * 100, logfile='log.txt'): sys.setrecursionlimit(50000) # needed for pickling models if seed is not None: random.seed(seed) numpy.random.seed(seed) prov = dp.getDataProvider(dataset, root=datapath, extra_train=extra_train) mapper = util.IdMapper(min_df=10) embedding_size = embedding_size if embedding_size is not None else hidden_size scaler = StandardScaler() if scaler == 'standard' else NoScaler() data = Data(prov, mapper, scaler, batch_size=batch_size, with_para=with_para, shuffle=shuffle, reverse=reverse, tokenizer=tokenizer) data.dump(model_path) model = Imaginet(size_vocab=mapper.size(), size_embed=embedding_size, size=hidden_size, size_out=4096, depth=depth, network=architecture, cost_visual=cost_visual, alpha=alpha, gru_activation=gru_activation, visual_activation=visual_activation, visual_encoder=visual_encoder, max_norm=max_norm, lr=lr, dropout_prob=dropout_prob) start_epoch = 1 grow_depth = depth if grow_depth is None else grow_depth do_training(logfile, epochs, start_epoch, batch_size, validate_period, model_path, model, data, grow_depth, grow_params_path)
def main(num_hidden=50, K=150, Type=1, isBinary=0, classifier="MLP", CNNfeat="softmax", L2=0.0005, C=1): # Type, a list indicate which we want to use. dataset = "coco" misc = {} misc["C"] = C misc["Type"] = Type misc["IsBinary"] = isBinary misc["numAnswer"] = K misc["numHidden"] = num_hidden misc["vali_size"] = 25000 misc["classifier"] = classifier misc["CNNfeat"] = CNNfeat misc["L2"] = L2 L1 = 0 dp = getDataProvider(dataset, misc["IsBinary"]) print "The K number is %d" % (misc["numAnswer"]) # dp.downloadImage() # dp.loadCaption() # get the vocabulary for the answers. misc["Awordtoix"], misc["Aixtoword"], misc["Avocab"] = preProBuildAnswerVocab( dp.iterAnswer("train"), misc["numAnswer"] ) misc["bowAnswerTrain"] = BoWAnswerEncoding(dp.iterAnswer("train"), misc["Awordtoix"]) misc["bowAnswerTest"] = BoWAnswerEncoding(dp.iterAnswer("test"), misc["Awordtoix"]) misc["multiAnswerTest"] = BOWMultiAnswerEncoding(dp.iterMultiAnswer("test"), misc["Awordtoix"]) misc["genCaptionTest"] = BOWMultiAnswerEncoding(dp.iterGenCaption("test"), misc["Awordtoix"]) misc["answerGroup"] = FindAnswerGroup(dp.iterImgIdQuestion("test")) if Type == 0: print "====================================================" print "Test on Question, The K number is %d" % (misc["numAnswer"]) print "====================================================" trainVec, testVec = preQuestion(dp, misc) if misc["classifier"] == "SVM": out = SVMtrainModel(trainVec, misc["bowAnswerTrain"], testVec, misc) else: out = trainModel(trainVec, misc["bowAnswerTrain"], testVec, 1030, misc["numHidden"], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll("test"), misc) openAnswerWriteJson(out, dp.iterAll("test"), misc) if Type == 1: print "====================================================" print "Test on QuestionImage, The K number is %d" % (misc["numAnswer"]) print "====================================================" trainVec, testVec, misc = preQuestionImage(dp, misc) Vocab_save = {} Vocab_save["Awordtoix"] = misc["Awordtoix"] Vocab_save["Aixtoword"] = misc["Aixtoword"] Vocab_save["Avocab"] = misc["Avocab"] Vocab_save["Qwordtoix0"] = misc["Qwordtoix0"] Vocab_save["Qwordtoix1"] = misc["Qwordtoix1"] Vocab_save["Qwordtoix2"] = misc["Qwordtoix2"] Vocab_save["Qwordtoix3"] = misc["Qwordtoix3"] utils.pickleSave("Vocab", Vocab_save) if misc["classifier"] == "SVM": out = SVMtrainModel(trainVec, misc["bowAnswerTrain"], testVec, misc) else: out = trainModel(trainVec, misc["bowAnswerTrain"], testVec, 2030, misc["numHidden"], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll("test"), misc) openAnswerWriteJson(out, dp.iterAll("test"), misc) if Type == 2: print "====================================================" print "Test on Caption, The K number is %d" % (misc["numAnswer"]) print "====================================================" trainVec, testVec = preCaption(dp, misc) if misc["classifier"] == "SVM": out = SVMtrainModel(trainVec, misc["bowAnswerTrain"], testVec, misc) else: out = trainModel(trainVec, misc["bowAnswerTrain"], testVec, 1000, misc["numHidden"], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll("test"), misc) openAnswerWriteJson(out, dp.iterAll("test"), misc) if Type == 3: print "====================================================" print "Test on Image, The K number is %d" % (misc["numAnswer"]) print "====================================================" trainVec, testVec = preImage(dp, misc) if misc["classifier"] == "SVM": out = SVMtrainModel(trainVec, misc["bowAnswerTrain"], testVec, misc) else: out = trainModel(trainVec, misc["bowAnswerTrain"], testVec, 1000, misc["numHidden"], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll("test"), misc) openAnswerWriteJson(out, dp.iterAll("test"), misc) if Type == 4: print "====================================================" print "Test on QuestionCaption, The K number is %d" % (misc["numAnswer"]) print "====================================================" trainVec, testVec = preQuestionCaption(dp, misc) if misc["classifier"] == "SVM": out = SVMtrainModel(trainVec, misc["bowAnswerTrain"], testVec, misc) else: out = trainModel(trainVec, misc["bowAnswerTrain"], testVec, 2030, misc["numHidden"], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll("test"), misc) openAnswerWriteJson(out, dp.iterAll("test"), misc) if Type == 5: print "====================================================" print "Test on QuestionImageCaption, The K number is %d" % (misc["numAnswer"]) print "====================================================" trainVec, testVec = preQuestionCaptionImg(dp, misc) if misc["classifier"] == "SVM": out = SVMtrainModel(trainVec, misc["bowAnswerTrain"], testVec, misc) else: out = trainModel(trainVec, misc["bowAnswerTrain"], testVec, 3030, misc["numHidden"], 300, misc, L1, L2) multChoiceWriteJson(out, dp.iterAll("test"), misc) openAnswerWriteJson(out, dp.iterAll("test"), misc) calAcc(out, misc)
def Prior_baseline(num_hidden=50, K=500, Type=0, isBinary=0, classifier="MLP", CNNfeat="fc7", L2=0.001, C=1): # Type, a list indicate which we want to use. dataset = "coco" misc = {} misc["C"] = C misc["Type"] = Type misc["IsBinary"] = isBinary misc["numAnswer"] = K misc["numHidden"] = num_hidden misc["vali_size"] = 20000 misc["classifier"] = classifier misc["CNNfeat"] = CNNfeat misc["L2"] = L2 dp = getDataProvider(dataset, misc["IsBinary"]) print "The K number is %d" % (misc["numAnswer"]) # dp.downloadImage() # dp.loadCaption() # get the vocabulary for the answers. misc["Awordtoix"], misc["Aixtoword"], misc["Avocab"] = preProBuildAnswerVocab( dp.iterAnswer("train"), misc["numAnswer"] ) misc["bowAnswerTrain"] = BoWAnswerEncoding(dp.iterAnswer("train"), misc["Awordtoix"]) misc["bowAnswerTest"] = BoWAnswerEncoding(dp.iterAnswer("test"), misc["Awordtoix"]) misc["multiAnswerTest"] = BOWMultiAnswerEncoding(dp.iterMultiAnswer("test"), misc["Awordtoix"]) misc["genCaptionTest"] = BOWMultiAnswerEncoding(dp.iterGenCaption("test"), misc["Awordtoix"]) misc["answerGroup"] = FindAnswerGroup(dp.iterImgIdQuestion("test")) for misc["th"] in range(150, 400, 25): print "th value is %d" % misc["th"] answer_counts, vocab, ques_depth = preProBuildAnswerVocabTop(dp, misc["th"]) idx = 0 ans_counts = {} for sent in dp.iterQuestion("train"): string = " ".join(sent[: ques_depth[idx]]) if string in vocab: if ans_counts.get(string, misc["numAnswer"]) == misc["numAnswer"]: ans_counts[string] = [misc["bowAnswerTrain"][idx]] else: ans_counts[string] = ans_counts.get(string, misc["numAnswer"]) + [misc["bowAnswerTrain"][idx]] idx += 1 # get the prior of the label ans_prior = {} for key, value in ans_counts.iteritems(): tmp = {} for idx in value: tmp[idx] = tmp.get(idx, 0) + 1 tmp_idx = sorted(tmp, key=tmp.get, reverse=True)[0] ans_prior[key] = misc["Aixtoword"].get(tmp_idx) for i in range(6): print "Depth %d" % (i + 1) for key, value in sorted(ans_counts.iteritems(), key=lambda (k, v): (v, k)): if len(key.split(" ")) == i + 1: print "%s: %s " % (key, len(value)), print "" for i in range(6): print "Depth %d" % (i + 1) for tmp in vocab: if len(tmp.split(" ")) == i + 1: print "%s: %s " % (tmp, ans_prior.get(tmp)), print "" depth_count = {} for tmp in vocab: count = len(tmp.split(" ")) depth_count[count] = depth_count.get(count, 0) + 1 pdb.set_trace() max_depth = max(ques_depth) ans = [] for sent in dp.iterQuestion("test"): # iter from big to small tmp = "" for depth in range(max_depth): string = " ".join(sent[: ques_depth[max_depth - depth]]) if string in vocab: tmp = string break if tmp != "": idx = misc["Awordtoix"].get(ans_prior.get(string)) ans.append(idx) else: ans.append(0) utils.mlpOPlable(ans, misc["bowAnswerTest"], misc["answerGroup"], misc["numAnswer"]) openAnswerWriteJson(ans, dp.iterAll("test"), misc)
def main(num_hidden=50, K=150, Type=1, isBinary=0, classifier='MLP', CNNfeat='softmax', L2=0.0005, C=1): # Type, a list indicate which we want to use. dataset = 'coco' misc = {} misc['C'] = C misc['Type'] = Type misc['IsBinary'] = isBinary misc['numAnswer'] = K misc['numHidden'] = num_hidden misc['vali_size'] = 25000 misc['classifier'] = classifier misc['CNNfeat'] = CNNfeat misc['L2'] = L2 L1 = 0 dp = getDataProvider(dataset, misc['IsBinary']) print 'The K number is %d' % (misc['numAnswer']) # dp.downloadImage() # dp.loadCaption() # get the vocabulary for the answers. misc['Awordtoix'], misc['Aixtoword'], misc['Avocab'] = preProBuildAnswerVocab( dp.iterAnswer('train'), misc['numAnswer']) misc['bowAnswerTrain'] = BoWAnswerEncoding(dp.iterAnswer('train'), misc['Awordtoix']) misc['bowAnswerTest'] = BoWAnswerEncoding(dp.iterAnswer('test'), misc['Awordtoix']) misc['multiAnswerTest'] = BOWMultiAnswerEncoding(dp.iterMultiAnswer('test'), misc['Awordtoix']) misc['genCaptionTest'] = BOWMultiAnswerEncoding(dp.iterGenCaption('test'), misc['Awordtoix']) misc['answerGroup'] = FindAnswerGroup(dp.iterImgIdQuestion('test')) if Type == 0: print '====================================================' print 'Test on Question, The K number is %d' % (misc['numAnswer']) print '====================================================' trainVec, testVec = preQuestion(dp, misc) if misc['classifier'] == 'SVM': out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc) else: out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 1030, misc['numHidden'], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll('test'), misc) openAnswerWriteJson(out, dp.iterAll('test'), misc) if Type == 1: print '====================================================' print 'Test on QuestionImage, The K number is %d' % (misc['numAnswer']) print '====================================================' trainVec, testVec, misc = preQuestionImage(dp, misc) Vocab_save = {} Vocab_save['Awordtoix'] = misc['Awordtoix'] Vocab_save['Aixtoword'] = misc['Aixtoword'] Vocab_save['Avocab'] = misc['Avocab'] Vocab_save['Qwordtoix0'] = misc['Qwordtoix0'] Vocab_save['Qwordtoix1'] = misc['Qwordtoix1'] Vocab_save['Qwordtoix2'] = misc['Qwordtoix2'] Vocab_save['Qwordtoix3'] = misc['Qwordtoix3'] utils.pickleSave('Vocab', Vocab_save) if misc['classifier'] == 'SVM': out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc) else: out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 2030, misc['numHidden'], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll('test'), misc) openAnswerWriteJson(out, dp.iterAll('test'), misc) if Type == 2: print '====================================================' print 'Test on Caption, The K number is %d' % (misc['numAnswer']) print '====================================================' trainVec, testVec = preCaption(dp, misc) if misc['classifier'] == 'SVM': out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc) else: out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 1000, misc['numHidden'], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll('test'), misc) openAnswerWriteJson(out, dp.iterAll('test'), misc) if Type == 3: print '====================================================' print 'Test on Image, The K number is %d' % (misc['numAnswer']) print '====================================================' trainVec, testVec = preImage(dp, misc) if misc['classifier'] == 'SVM': out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc) else: out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 1000, misc['numHidden'], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll('test'), misc) openAnswerWriteJson(out, dp.iterAll('test'), misc) if Type == 4: print '====================================================' print 'Test on QuestionCaption, The K number is %d' % (misc['numAnswer']) print '====================================================' trainVec, testVec = preQuestionCaption(dp, misc) if misc['classifier'] == 'SVM': out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc) else: out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 2030, misc['numHidden'], 300, misc, L1, L2) calAcc(out, misc) multChoiceWriteJson(out, dp.iterAll('test'), misc) openAnswerWriteJson(out, dp.iterAll('test'), misc) if Type == 5: print '====================================================' print 'Test on QuestionImageCaption, The K number is %d' % (misc['numAnswer']) print '====================================================' trainVec, testVec = preQuestionCaptionImg(dp, misc) if misc['classifier'] == 'SVM': out = SVMtrainModel(trainVec, misc['bowAnswerTrain'], testVec, misc) else: out = trainModel(trainVec, misc['bowAnswerTrain'], testVec, 3030, misc['numHidden'], 300, misc, L1, L2) multChoiceWriteJson(out, dp.iterAll('test'), misc) openAnswerWriteJson(out, dp.iterAll('test'), misc) calAcc(out, misc)
def Prior_baseline(num_hidden=50, K = 500, Type = 0, isBinary=0, classifier = 'MLP', CNNfeat = 'fc7', L2 = 0.001, C = 1): # Type, a list indicate which we want to use. dataset = 'coco' word_count_threshold = 1 misc= {} misc['C'] = C misc['Type'] = Type misc['IsBinary'] = isBinary misc['numAnswer'] = K misc['numHidden'] = num_hidden misc['vali_size'] = 20000 misc['classifier'] =classifier misc['CNNfeat'] = CNNfeat misc['L2'] = L2 L1 = 0 dp = getDataProvider(dataset, misc['IsBinary']) print 'The K number is %d' %(misc['numAnswer']) #dp.downloadImage() #dp.loadCaption() # get the vocabulary for the answers. misc['Awordtoix'], misc['Aixtoword'], misc['Avocab'] = preProBuildAnswerVocab(dp.iterAnswer('train'),misc['numAnswer']) misc['bowAnswerTrain'] = BoWAnswerEncoding(dp.iterAnswer('train'), misc['Awordtoix']) misc['bowAnswerTest'] = BoWAnswerEncoding(dp.iterAnswer('test'), misc['Awordtoix']) misc['multiAnswerTest'] = BOWMultiAnswerEncoding(dp.iterMultiAnswer('test'), misc['Awordtoix']) misc['genCaptionTest'] = BOWMultiAnswerEncoding(dp.iterGenCaption('test'), misc['Awordtoix']) misc['answerGroup'] = FindAnswerGroup(dp.iterImgIdQuestion('test')) result = {} for misc['th'] in range(150, 400, 25): print 'th value is %d' %misc['th'] answer_counts, vocab, ques_depth = preProBuildAnswerVocabTop(dp, misc['th']) idx = 0 ans_counts = {} for sent in dp.iterQuestion('train'): string = ' '.join(sent[:ques_depth[idx]]) if string in vocab: if ans_counts.get(string, misc['numAnswer']) == misc['numAnswer']: ans_counts[string] = [misc['bowAnswerTrain'][idx]] else: ans_counts[string] = ans_counts.get(string, misc['numAnswer']) + [misc['bowAnswerTrain'][idx]] idx += 1 # get the prior of the label ans_prior = {} for key, value in ans_counts.iteritems(): tmp = {} for idx in value: tmp[idx] = tmp.get(idx, 0) + 1 tmp_idx = sorted(tmp, key=tmp.get, reverse=True)[0] ans_prior[key] = misc['Aixtoword'].get(tmp_idx) for i in range(6): print "Depth %d" %(i+1) for key, value in sorted(ans_counts.iteritems(), key=lambda (k,v): (v,k)): if len(key.split(' '))==i+1: print "%s: %s " % (key, len(value)), print "" for i in range(6): print "Depth %d" %(i+1) for tmp in vocab: if len(tmp.split(' '))==i+1: print "%s: %s " % (tmp, ans_prior.get(tmp)), print "" depth_count = {} for tmp in vocab: count = len(tmp.split(' ')) depth_count[count] = depth_count.get(count, 0) + 1 pdb.set_trace() max_depth = max(ques_depth) ans = [] for sent in dp.iterQuestion('test'): # iter from big to small tmp = '' for depth in range(max_depth): string = ' '.join(sent[:ques_depth[max_depth - depth]]) if string in vocab: tmp = string break if tmp != '': idx = misc['Awordtoix'].get(ans_prior.get(string)) ans.append(idx) else: ans.append(0) utils.mlpOPlable(ans, misc['bowAnswerTest'], misc['answerGroup'], misc['numAnswer']) openAnswerWriteJson(ans, dp.iterAll('test'), misc)