예제 #1
0
def calRandomAcc(misc):
    utils.mlpOPlable(np.zeros(len(misc['bowAnswerTest'])),
                     misc['bowAnswerTest'], misc['answerGroup'],
                     misc['numAnswer'])
    randintx = [randint(0, 499) for p in range(len(misc['bowAnswerTest']))]
    utils.mlpOPlable(randintx, misc['bowAnswerTest'], misc['answerGroup'],
                     misc['numAnswer'])
예제 #2
0
def Prior_baseline(num_hidden=50,
                   K=500,
                   Type=0,
                   isBinary=0,
                   classifier='MLP',
                   CNNfeat='fc7',
                   L2=0.001,
                   C=1):
    # Type, a list indicate which we want to use.
    dataset = 'coco'
    misc = {}
    misc['C'] = C
    misc['Type'] = Type
    misc['IsBinary'] = isBinary
    misc['numAnswer'] = K
    misc['numHidden'] = num_hidden
    misc['vali_size'] = 20000
    misc['classifier'] = classifier
    misc['CNNfeat'] = CNNfeat
    misc['L2'] = L2

    dp = getDataProvider(dataset, misc['IsBinary'])
    print 'The K number is %d' % (misc['numAnswer'])
    # dp.downloadImage()
    # dp.loadCaption()

    # get the vocabulary for the answers.
    misc['Awordtoix'], misc['Aixtoword'], misc[
        'Avocab'] = preProBuildAnswerVocab(dp.iterAnswer('train'),
                                           misc['numAnswer'])

    misc['bowAnswerTrain'] = BoWAnswerEncoding(dp.iterAnswer('train'),
                                               misc['Awordtoix'])
    misc['bowAnswerTest'] = BoWAnswerEncoding(dp.iterAnswer('test'),
                                              misc['Awordtoix'])
    misc['multiAnswerTest'] = BOWMultiAnswerEncoding(
        dp.iterMultiAnswer('test'), misc['Awordtoix'])
    misc['genCaptionTest'] = BOWMultiAnswerEncoding(dp.iterGenCaption('test'),
                                                    misc['Awordtoix'])
    misc['answerGroup'] = FindAnswerGroup(dp.iterImgIdQuestion('test'))

    for misc['th'] in range(150, 400, 25):

        print 'th value is %d' % misc['th']
        answer_counts, vocab, ques_depth = preProBuildAnswerVocabTop(
            dp, misc['th'])

        idx = 0
        ans_counts = {}

        for sent in dp.iterQuestion('train'):
            string = ' '.join(sent[:ques_depth[idx]])
            if string in vocab:
                if ans_counts.get(string,
                                  misc['numAnswer']) == misc['numAnswer']:
                    ans_counts[string] = [misc['bowAnswerTrain'][idx]]
                else:
                    ans_counts[string] = ans_counts.get(
                        string,
                        misc['numAnswer']) + [misc['bowAnswerTrain'][idx]]
            idx += 1

        # get the prior of the label
        ans_prior = {}
        for key, value in ans_counts.iteritems():
            tmp = {}
            for idx in value:
                tmp[idx] = tmp.get(idx, 0) + 1
            tmp_idx = sorted(tmp, key=tmp.get, reverse=True)[0]

            ans_prior[key] = misc['Aixtoword'].get(tmp_idx)

        for i in range(6):
            print "Depth %d" % (i + 1)
            for key, value in sorted(ans_counts.iteritems(),
                                     key=lambda (k, v): (v, k)):
                if len(key.split(' ')) == i + 1:
                    print "%s: %s   " % (key, len(value)),
            print ""

        for i in range(6):
            print "Depth %d" % (i + 1)
            for tmp in vocab:
                if len(tmp.split(' ')) == i + 1:
                    print "%s: %s   " % (tmp, ans_prior.get(tmp)),
            print ""

        depth_count = {}
        for tmp in vocab:
            count = len(tmp.split(' '))
            depth_count[count] = depth_count.get(count, 0) + 1

        pdb.set_trace()
        max_depth = max(ques_depth)

        ans = []
        for sent in dp.iterQuestion('test'):
            # iter from big to small
            tmp = ''
            for depth in range(max_depth):
                string = ' '.join(sent[:ques_depth[max_depth - depth]])

                if string in vocab:
                    tmp = string
                    break

            if tmp != '':
                idx = misc['Awordtoix'].get(ans_prior.get(string))
                ans.append(idx)
            else:
                ans.append(0)

        utils.mlpOPlable(ans, misc['bowAnswerTest'], misc['answerGroup'],
                         misc['numAnswer'])

        openAnswerWriteJson(ans, dp.iterAll('test'), misc)
예제 #3
0
파일: driver.py 프로젝트: xetrocoen/CloudCV
def Prior_baseline(num_hidden=50, K=500, Type=0, isBinary=0, classifier="MLP", CNNfeat="fc7", L2=0.001, C=1):
    # Type, a list indicate which we want to use.
    dataset = "coco"
    misc = {}
    misc["C"] = C
    misc["Type"] = Type
    misc["IsBinary"] = isBinary
    misc["numAnswer"] = K
    misc["numHidden"] = num_hidden
    misc["vali_size"] = 20000
    misc["classifier"] = classifier
    misc["CNNfeat"] = CNNfeat
    misc["L2"] = L2

    dp = getDataProvider(dataset, misc["IsBinary"])
    print "The K number is %d" % (misc["numAnswer"])
    # dp.downloadImage()
    # dp.loadCaption()

    # get the vocabulary for the answers.
    misc["Awordtoix"], misc["Aixtoword"], misc["Avocab"] = preProBuildAnswerVocab(
        dp.iterAnswer("train"), misc["numAnswer"]
    )

    misc["bowAnswerTrain"] = BoWAnswerEncoding(dp.iterAnswer("train"), misc["Awordtoix"])
    misc["bowAnswerTest"] = BoWAnswerEncoding(dp.iterAnswer("test"), misc["Awordtoix"])
    misc["multiAnswerTest"] = BOWMultiAnswerEncoding(dp.iterMultiAnswer("test"), misc["Awordtoix"])
    misc["genCaptionTest"] = BOWMultiAnswerEncoding(dp.iterGenCaption("test"), misc["Awordtoix"])
    misc["answerGroup"] = FindAnswerGroup(dp.iterImgIdQuestion("test"))

    for misc["th"] in range(150, 400, 25):

        print "th value is %d" % misc["th"]
        answer_counts, vocab, ques_depth = preProBuildAnswerVocabTop(dp, misc["th"])

        idx = 0
        ans_counts = {}

        for sent in dp.iterQuestion("train"):
            string = " ".join(sent[: ques_depth[idx]])
            if string in vocab:
                if ans_counts.get(string, misc["numAnswer"]) == misc["numAnswer"]:
                    ans_counts[string] = [misc["bowAnswerTrain"][idx]]
                else:
                    ans_counts[string] = ans_counts.get(string, misc["numAnswer"]) + [misc["bowAnswerTrain"][idx]]
            idx += 1

        # get the prior of the label
        ans_prior = {}
        for key, value in ans_counts.iteritems():
            tmp = {}
            for idx in value:
                tmp[idx] = tmp.get(idx, 0) + 1
            tmp_idx = sorted(tmp, key=tmp.get, reverse=True)[0]

            ans_prior[key] = misc["Aixtoword"].get(tmp_idx)

        for i in range(6):
            print "Depth %d" % (i + 1)
            for key, value in sorted(ans_counts.iteritems(), key=lambda (k, v): (v, k)):
                if len(key.split(" ")) == i + 1:
                    print "%s: %s   " % (key, len(value)),
            print ""

        for i in range(6):
            print "Depth %d" % (i + 1)
            for tmp in vocab:
                if len(tmp.split(" ")) == i + 1:
                    print "%s: %s   " % (tmp, ans_prior.get(tmp)),
            print ""

        depth_count = {}
        for tmp in vocab:
            count = len(tmp.split(" "))
            depth_count[count] = depth_count.get(count, 0) + 1

        pdb.set_trace()
        max_depth = max(ques_depth)

        ans = []
        for sent in dp.iterQuestion("test"):
            # iter from big to small
            tmp = ""
            for depth in range(max_depth):
                string = " ".join(sent[: ques_depth[max_depth - depth]])

                if string in vocab:
                    tmp = string
                    break

            if tmp != "":
                idx = misc["Awordtoix"].get(ans_prior.get(string))
                ans.append(idx)
            else:
                ans.append(0)

        utils.mlpOPlable(ans, misc["bowAnswerTest"], misc["answerGroup"], misc["numAnswer"])

        openAnswerWriteJson(ans, dp.iterAll("test"), misc)
예제 #4
0
파일: driver.py 프로젝트: xetrocoen/CloudCV
def calRandomAcc(misc):
    utils.mlpOPlable(
        np.zeros(len(misc["bowAnswerTest"])), misc["bowAnswerTest"], misc["answerGroup"], misc["numAnswer"]
    )
    randintx = [randint(0, 499) for p in range(len(misc["bowAnswerTest"]))]
    utils.mlpOPlable(randintx, misc["bowAnswerTest"], misc["answerGroup"], misc["numAnswer"])
예제 #5
0
파일: driver.py 프로젝트: anant-dev/CloudCV
def calRandomAcc(misc):
    utils.mlpOPlable(np.zeros(len(misc['bowAnswerTest'])), misc[
        'bowAnswerTest'], misc['answerGroup'], misc['numAnswer'])
    randintx = [randint(0, 499) for p in range(len(misc['bowAnswerTest']))]
    utils.mlpOPlable(randintx, misc['bowAnswerTest'], misc['answerGroup'], misc['numAnswer'])
예제 #6
0
def Prior_baseline(num_hidden=50, K = 500, Type = 0, isBinary=0, classifier = 'MLP', CNNfeat = 'fc7', L2 = 0.001, C = 1):
    # Type, a list indicate which we want to use. 
    dataset = 'coco'
    word_count_threshold = 1
    misc= {}  
    misc['C'] = C
    misc['Type'] = Type
    misc['IsBinary'] = isBinary
    misc['numAnswer'] = K
    misc['numHidden'] = num_hidden
    misc['vali_size'] = 20000
    misc['classifier'] =classifier
    misc['CNNfeat'] = CNNfeat
    misc['L2'] = L2 
    L1 = 0
    
    dp = getDataProvider(dataset, misc['IsBinary'])  
    print 'The K number is %d' %(misc['numAnswer'])
    #dp.downloadImage()
    #dp.loadCaption()

    # get the vocabulary for the answers.
    misc['Awordtoix'], misc['Aixtoword'], misc['Avocab'] = preProBuildAnswerVocab(dp.iterAnswer('train'),misc['numAnswer'])
    
    misc['bowAnswerTrain'] = BoWAnswerEncoding(dp.iterAnswer('train'), misc['Awordtoix'])
    misc['bowAnswerTest'] = BoWAnswerEncoding(dp.iterAnswer('test'), misc['Awordtoix'])
    misc['multiAnswerTest'] = BOWMultiAnswerEncoding(dp.iterMultiAnswer('test'), misc['Awordtoix'])
    misc['genCaptionTest'] = BOWMultiAnswerEncoding(dp.iterGenCaption('test'), misc['Awordtoix'])
    misc['answerGroup'] = FindAnswerGroup(dp.iterImgIdQuestion('test'))
    result = {}
    
    for misc['th'] in range(150, 400, 25):

        print 'th value is %d' %misc['th']
        answer_counts, vocab, ques_depth = preProBuildAnswerVocabTop(dp, misc['th'])

        idx = 0
        ans_counts = {}

        for sent in dp.iterQuestion('train'):
            string = ' '.join(sent[:ques_depth[idx]])
            if string in vocab:
                if ans_counts.get(string, misc['numAnswer']) == misc['numAnswer']:
                    ans_counts[string] = [misc['bowAnswerTrain'][idx]]
                else:
                    ans_counts[string] = ans_counts.get(string, misc['numAnswer']) + [misc['bowAnswerTrain'][idx]]
            idx += 1

        # get the prior of the label
        ans_prior = {}
        for key, value in ans_counts.iteritems():
            tmp = {}
            for idx in value:
                tmp[idx] = tmp.get(idx, 0) + 1
            tmp_idx = sorted(tmp, key=tmp.get, reverse=True)[0]

            ans_prior[key] = misc['Aixtoword'].get(tmp_idx)

        for i in range(6):
            print "Depth %d" %(i+1)
            for key, value in sorted(ans_counts.iteritems(), key=lambda (k,v): (v,k)):
                if len(key.split(' '))==i+1:
                    print "%s: %s   " % (key, len(value)),
            print ""
        
        for i in range(6):
            print "Depth %d" %(i+1)
            for tmp in vocab:
                if len(tmp.split(' '))==i+1:
                    print "%s: %s   " % (tmp, ans_prior.get(tmp)), 
            print ""

        depth_count = {}
        for tmp in vocab:
            count = len(tmp.split(' '))
            depth_count[count] = depth_count.get(count, 0) + 1

        pdb.set_trace()
        max_depth = max(ques_depth)

        ans = []
        for sent in dp.iterQuestion('test'):
            # iter from big to small
            tmp = ''
            for depth in range(max_depth):
                string = ' '.join(sent[:ques_depth[max_depth - depth]])

                if string in vocab:
                    tmp = string
                    break


            if tmp != '':
                idx = misc['Awordtoix'].get(ans_prior.get(string))
                ans.append(idx)
            else:
                ans.append(0)
        
        utils.mlpOPlable(ans, misc['bowAnswerTest'], misc['answerGroup'], misc['numAnswer'])

        openAnswerWriteJson(ans, dp.iterAll('test'), misc)