def calRandomAcc(misc): utils.mlpOPlable(np.zeros(len(misc['bowAnswerTest'])), misc['bowAnswerTest'], misc['answerGroup'], misc['numAnswer']) randintx = [randint(0, 499) for p in range(len(misc['bowAnswerTest']))] utils.mlpOPlable(randintx, misc['bowAnswerTest'], misc['answerGroup'], misc['numAnswer'])
def Prior_baseline(num_hidden=50, K=500, Type=0, isBinary=0, classifier='MLP', CNNfeat='fc7', L2=0.001, C=1): # Type, a list indicate which we want to use. dataset = 'coco' misc = {} misc['C'] = C misc['Type'] = Type misc['IsBinary'] = isBinary misc['numAnswer'] = K misc['numHidden'] = num_hidden misc['vali_size'] = 20000 misc['classifier'] = classifier misc['CNNfeat'] = CNNfeat misc['L2'] = L2 dp = getDataProvider(dataset, misc['IsBinary']) print 'The K number is %d' % (misc['numAnswer']) # dp.downloadImage() # dp.loadCaption() # get the vocabulary for the answers. misc['Awordtoix'], misc['Aixtoword'], misc[ 'Avocab'] = preProBuildAnswerVocab(dp.iterAnswer('train'), misc['numAnswer']) misc['bowAnswerTrain'] = BoWAnswerEncoding(dp.iterAnswer('train'), misc['Awordtoix']) misc['bowAnswerTest'] = BoWAnswerEncoding(dp.iterAnswer('test'), misc['Awordtoix']) misc['multiAnswerTest'] = BOWMultiAnswerEncoding( dp.iterMultiAnswer('test'), misc['Awordtoix']) misc['genCaptionTest'] = BOWMultiAnswerEncoding(dp.iterGenCaption('test'), misc['Awordtoix']) misc['answerGroup'] = FindAnswerGroup(dp.iterImgIdQuestion('test')) for misc['th'] in range(150, 400, 25): print 'th value is %d' % misc['th'] answer_counts, vocab, ques_depth = preProBuildAnswerVocabTop( dp, misc['th']) idx = 0 ans_counts = {} for sent in dp.iterQuestion('train'): string = ' '.join(sent[:ques_depth[idx]]) if string in vocab: if ans_counts.get(string, misc['numAnswer']) == misc['numAnswer']: ans_counts[string] = [misc['bowAnswerTrain'][idx]] else: ans_counts[string] = ans_counts.get( string, misc['numAnswer']) + [misc['bowAnswerTrain'][idx]] idx += 1 # get the prior of the label ans_prior = {} for key, value in ans_counts.iteritems(): tmp = {} for idx in value: tmp[idx] = tmp.get(idx, 0) + 1 tmp_idx = sorted(tmp, key=tmp.get, reverse=True)[0] ans_prior[key] = misc['Aixtoword'].get(tmp_idx) for i in range(6): print "Depth %d" % (i + 1) for key, value in sorted(ans_counts.iteritems(), key=lambda (k, v): (v, k)): if len(key.split(' ')) == i + 1: print "%s: %s " % (key, len(value)), print "" for i in range(6): print "Depth %d" % (i + 1) for tmp in vocab: if len(tmp.split(' ')) == i + 1: print "%s: %s " % (tmp, ans_prior.get(tmp)), print "" depth_count = {} for tmp in vocab: count = len(tmp.split(' ')) depth_count[count] = depth_count.get(count, 0) + 1 pdb.set_trace() max_depth = max(ques_depth) ans = [] for sent in dp.iterQuestion('test'): # iter from big to small tmp = '' for depth in range(max_depth): string = ' '.join(sent[:ques_depth[max_depth - depth]]) if string in vocab: tmp = string break if tmp != '': idx = misc['Awordtoix'].get(ans_prior.get(string)) ans.append(idx) else: ans.append(0) utils.mlpOPlable(ans, misc['bowAnswerTest'], misc['answerGroup'], misc['numAnswer']) openAnswerWriteJson(ans, dp.iterAll('test'), misc)
def Prior_baseline(num_hidden=50, K=500, Type=0, isBinary=0, classifier="MLP", CNNfeat="fc7", L2=0.001, C=1): # Type, a list indicate which we want to use. dataset = "coco" misc = {} misc["C"] = C misc["Type"] = Type misc["IsBinary"] = isBinary misc["numAnswer"] = K misc["numHidden"] = num_hidden misc["vali_size"] = 20000 misc["classifier"] = classifier misc["CNNfeat"] = CNNfeat misc["L2"] = L2 dp = getDataProvider(dataset, misc["IsBinary"]) print "The K number is %d" % (misc["numAnswer"]) # dp.downloadImage() # dp.loadCaption() # get the vocabulary for the answers. misc["Awordtoix"], misc["Aixtoword"], misc["Avocab"] = preProBuildAnswerVocab( dp.iterAnswer("train"), misc["numAnswer"] ) misc["bowAnswerTrain"] = BoWAnswerEncoding(dp.iterAnswer("train"), misc["Awordtoix"]) misc["bowAnswerTest"] = BoWAnswerEncoding(dp.iterAnswer("test"), misc["Awordtoix"]) misc["multiAnswerTest"] = BOWMultiAnswerEncoding(dp.iterMultiAnswer("test"), misc["Awordtoix"]) misc["genCaptionTest"] = BOWMultiAnswerEncoding(dp.iterGenCaption("test"), misc["Awordtoix"]) misc["answerGroup"] = FindAnswerGroup(dp.iterImgIdQuestion("test")) for misc["th"] in range(150, 400, 25): print "th value is %d" % misc["th"] answer_counts, vocab, ques_depth = preProBuildAnswerVocabTop(dp, misc["th"]) idx = 0 ans_counts = {} for sent in dp.iterQuestion("train"): string = " ".join(sent[: ques_depth[idx]]) if string in vocab: if ans_counts.get(string, misc["numAnswer"]) == misc["numAnswer"]: ans_counts[string] = [misc["bowAnswerTrain"][idx]] else: ans_counts[string] = ans_counts.get(string, misc["numAnswer"]) + [misc["bowAnswerTrain"][idx]] idx += 1 # get the prior of the label ans_prior = {} for key, value in ans_counts.iteritems(): tmp = {} for idx in value: tmp[idx] = tmp.get(idx, 0) + 1 tmp_idx = sorted(tmp, key=tmp.get, reverse=True)[0] ans_prior[key] = misc["Aixtoword"].get(tmp_idx) for i in range(6): print "Depth %d" % (i + 1) for key, value in sorted(ans_counts.iteritems(), key=lambda (k, v): (v, k)): if len(key.split(" ")) == i + 1: print "%s: %s " % (key, len(value)), print "" for i in range(6): print "Depth %d" % (i + 1) for tmp in vocab: if len(tmp.split(" ")) == i + 1: print "%s: %s " % (tmp, ans_prior.get(tmp)), print "" depth_count = {} for tmp in vocab: count = len(tmp.split(" ")) depth_count[count] = depth_count.get(count, 0) + 1 pdb.set_trace() max_depth = max(ques_depth) ans = [] for sent in dp.iterQuestion("test"): # iter from big to small tmp = "" for depth in range(max_depth): string = " ".join(sent[: ques_depth[max_depth - depth]]) if string in vocab: tmp = string break if tmp != "": idx = misc["Awordtoix"].get(ans_prior.get(string)) ans.append(idx) else: ans.append(0) utils.mlpOPlable(ans, misc["bowAnswerTest"], misc["answerGroup"], misc["numAnswer"]) openAnswerWriteJson(ans, dp.iterAll("test"), misc)
def calRandomAcc(misc): utils.mlpOPlable( np.zeros(len(misc["bowAnswerTest"])), misc["bowAnswerTest"], misc["answerGroup"], misc["numAnswer"] ) randintx = [randint(0, 499) for p in range(len(misc["bowAnswerTest"]))] utils.mlpOPlable(randintx, misc["bowAnswerTest"], misc["answerGroup"], misc["numAnswer"])
def calRandomAcc(misc): utils.mlpOPlable(np.zeros(len(misc['bowAnswerTest'])), misc[ 'bowAnswerTest'], misc['answerGroup'], misc['numAnswer']) randintx = [randint(0, 499) for p in range(len(misc['bowAnswerTest']))] utils.mlpOPlable(randintx, misc['bowAnswerTest'], misc['answerGroup'], misc['numAnswer'])
def Prior_baseline(num_hidden=50, K = 500, Type = 0, isBinary=0, classifier = 'MLP', CNNfeat = 'fc7', L2 = 0.001, C = 1): # Type, a list indicate which we want to use. dataset = 'coco' word_count_threshold = 1 misc= {} misc['C'] = C misc['Type'] = Type misc['IsBinary'] = isBinary misc['numAnswer'] = K misc['numHidden'] = num_hidden misc['vali_size'] = 20000 misc['classifier'] =classifier misc['CNNfeat'] = CNNfeat misc['L2'] = L2 L1 = 0 dp = getDataProvider(dataset, misc['IsBinary']) print 'The K number is %d' %(misc['numAnswer']) #dp.downloadImage() #dp.loadCaption() # get the vocabulary for the answers. misc['Awordtoix'], misc['Aixtoword'], misc['Avocab'] = preProBuildAnswerVocab(dp.iterAnswer('train'),misc['numAnswer']) misc['bowAnswerTrain'] = BoWAnswerEncoding(dp.iterAnswer('train'), misc['Awordtoix']) misc['bowAnswerTest'] = BoWAnswerEncoding(dp.iterAnswer('test'), misc['Awordtoix']) misc['multiAnswerTest'] = BOWMultiAnswerEncoding(dp.iterMultiAnswer('test'), misc['Awordtoix']) misc['genCaptionTest'] = BOWMultiAnswerEncoding(dp.iterGenCaption('test'), misc['Awordtoix']) misc['answerGroup'] = FindAnswerGroup(dp.iterImgIdQuestion('test')) result = {} for misc['th'] in range(150, 400, 25): print 'th value is %d' %misc['th'] answer_counts, vocab, ques_depth = preProBuildAnswerVocabTop(dp, misc['th']) idx = 0 ans_counts = {} for sent in dp.iterQuestion('train'): string = ' '.join(sent[:ques_depth[idx]]) if string in vocab: if ans_counts.get(string, misc['numAnswer']) == misc['numAnswer']: ans_counts[string] = [misc['bowAnswerTrain'][idx]] else: ans_counts[string] = ans_counts.get(string, misc['numAnswer']) + [misc['bowAnswerTrain'][idx]] idx += 1 # get the prior of the label ans_prior = {} for key, value in ans_counts.iteritems(): tmp = {} for idx in value: tmp[idx] = tmp.get(idx, 0) + 1 tmp_idx = sorted(tmp, key=tmp.get, reverse=True)[0] ans_prior[key] = misc['Aixtoword'].get(tmp_idx) for i in range(6): print "Depth %d" %(i+1) for key, value in sorted(ans_counts.iteritems(), key=lambda (k,v): (v,k)): if len(key.split(' '))==i+1: print "%s: %s " % (key, len(value)), print "" for i in range(6): print "Depth %d" %(i+1) for tmp in vocab: if len(tmp.split(' '))==i+1: print "%s: %s " % (tmp, ans_prior.get(tmp)), print "" depth_count = {} for tmp in vocab: count = len(tmp.split(' ')) depth_count[count] = depth_count.get(count, 0) + 1 pdb.set_trace() max_depth = max(ques_depth) ans = [] for sent in dp.iterQuestion('test'): # iter from big to small tmp = '' for depth in range(max_depth): string = ' '.join(sent[:ques_depth[max_depth - depth]]) if string in vocab: tmp = string break if tmp != '': idx = misc['Awordtoix'].get(ans_prior.get(string)) ans.append(idx) else: ans.append(0) utils.mlpOPlable(ans, misc['bowAnswerTest'], misc['answerGroup'], misc['numAnswer']) openAnswerWriteJson(ans, dp.iterAll('test'), misc)