Exemplo n.º 1
0
def process(options, collection):
    rootpath = options.rootpath
    overwrite = options.overwrite
    feature = options.feature
    method = options.method
    sigma = options.sigma

    # result path
    ranking_result_path = os.path.join(rootpath, collection, 'SimilarityIndex',
                                       collection, 'MetaData', method, feature)
    DCG_result_path = os.path.join(rootpath, collection, 'DCG', method,
                                   feature)
    if checkToSkip(ranking_result_path, overwrite):
        sys.exit(0)
    if checkToSkip(DCG_result_path, overwrite):
        sys.exit(0)

    # inpute of query
    qid_query_file = os.path.join(rootpath, collection, 'Annotations',
                                  'qid.text.txt')
    qid_list, query_list = readQidQuery(qid_query_file)
    qid2query = dict(zip(qid_list, query_list))

    # inpute of image
    img_feat_path = os.path.join(rootpath, collection, 'FeatureData', feature)
    img_feats = BigFile(img_feat_path)

    # the model to calculate DCG@25
    scorer = getScorer("DCG@25")

    done = 0
    qid2dcg = collections.OrderedDict()
    qid2iid_label_score = {}

    for qid in qid_list:
        iid_list, label_list = readAnnotationsFrom(
            collection, 'concepts%s.txt' % collection, qid, False, rootpath)

        renamed, test_X = img_feats.read(iid_list)

        parzen_list = []
        for imidx in iid_list:
            parzen_list.append(
                calParzen(img_feats.read_one(imidx), test_X, sigma))

        # parzen_list_suffle = calParzen_fast(test_X, len(renamed), sigma)
        # parzen_list = []
        # for imidx in iid_list:
        #     parzen_list.append(parzen_list_suffle[renamed.index(imidx)])

        sorted_tuple = sorted(zip(iid_list, label_list, parzen_list),
                              key=lambda v: v[2],
                              reverse=True)
        qid2iid_label_score[qid] = sorted_tuple

        # calculate DCG@25
        sorted_label = [x[1] for x in sorted_tuple]
        qid2dcg[qid] = scorer.score(sorted_label)
        printMessage("Done", qid, qid2query[qid])

        done += 1
        if done % 20 == 0:
            writeRankingResult(ranking_result_path, qid2iid_label_score)
            qid2iid_label_score = {}

    writeDCGResult(DCG_result_path, qid2dcg)
    writeRankingResult(ranking_result_path, qid2iid_label_score)
    print "average DCG@25: %f" % (1.0 * sum(qid2dcg.values()) /
                                  len(qid2dcg.values()))

    result_path_file = "result/individual_result_pathes.txt"
    if os.path.exists(result_path_file):
        fout = open(result_path_file, 'a')
    else:
        makedirsforfile(result_path_file)
        fout = open(result_path_file, 'w')
    fout.write(ranking_result_path + '\n')
    fout.close()
Exemplo n.º 2
0
def process(options, collection):
    rootpath = options.rootpath
    overwrite = options.overwrite
    feature = options.feature
    method = options.method
    sigma =options.sigma

    # result path
    ranking_result_path = os.path.join(rootpath, collection, 'SimilarityIndex', collection, 'MetaData', method, feature)
    DCG_result_path = os.path.join(rootpath, collection, 'DCG', method, feature)
    if checkToSkip(ranking_result_path, overwrite):
        sys.exit(0)
    if checkToSkip(DCG_result_path, overwrite):
        sys.exit(0)
    
    # inpute of query
    qid_query_file = os.path.join(rootpath, collection, 'Annotations', 'qid.text.txt')
    qid_list, query_list = readQidQuery(qid_query_file)
    qid2query =  dict(zip(qid_list, query_list))
    
    # inpute of image
    img_feat_path = os.path.join(rootpath, collection, 'FeatureData', feature)
    img_feats = BigFile(img_feat_path)

    # the model to calculate DCG@25
    scorer = getScorer("DCG@25")


    done = 0
    qid2dcg = collections.OrderedDict()
    qid2iid_label_score = {}

    for qid in qid_list:
        iid_list, label_list = readAnnotationsFrom(collection, 'concepts%s.txt' % collection, qid, False, rootpath)

        renamed, test_X = img_feats.read(iid_list)

        parzen_list = []
        for imidx in iid_list:
            parzen_list.append(calParzen(img_feats.read_one(imidx), test_X , sigma))

        sorted_tuple = sorted(zip(iid_list, label_list, parzen_list), key=lambda v:v[2], reverse=True)
        qid2iid_label_score[qid] = sorted_tuple

        # calculate DCG@25
        sorted_label = [x[1] for x in sorted_tuple]
        qid2dcg[qid] = scorer.score(sorted_label)
        printMessage("Done", qid, qid2query[qid])

        done += 1
        if done % 20 == 0:
             writeRankingResult(ranking_result_path, qid2iid_label_score)
             qid2iid_label_score = {}


    writeDCGResult(DCG_result_path, qid2dcg)
    writeRankingResult(ranking_result_path, qid2iid_label_score)
    print "average DCG@25: %f" % (1.0*sum(qid2dcg.values())/ len(qid2dcg.values()))

    result_path_file = "result/individual_result_pathes.txt"
    if os.path.exists(result_path_file):
        fout = open(result_path_file,'a')
    else:
        makedirsforfile(result_path_file)
        fout = open(result_path_file, 'w')
    fout.write(ranking_result_path + '\n')
    fout.close()
Exemplo n.º 3
0
def process(options, trainCollection, devCollection):
    rootpath = options.rootpath
    overwrite = options.overwrite
    method = options.method
    metric = options.metric

    qrysim = options.qrysim
    qrythres = options.qrythres
    ntopimg = options.ntopimg
    ntopqry = options.ntopqry
    mincc = options.mincc
    feature = options.feature

    # semantic embedding
    k = options.k
    corpus = options.corpus
    word2vec_model = options.word2vec
    label_source = options.label_source

    # result path
    ranking_result_path = os.path.join(rootpath, devCollection, 'SimilarityIndex', devCollection, 'MetaData', method, feature)
    DCG_result_path = os.path.join(rootpath, devCollection, metric, method, feature)
    if checkToSkip(ranking_result_path, overwrite):
        sys.exit(0)
    if checkToSkip(DCG_result_path, overwrite):
        sys.exit(0)

    # inpute of query
    qp = SimpleQueryParser()
    qid_query_file = os.path.join(rootpath, devCollection, 'Annotations', 'qid.text.txt')
    qid_list, query_list = readQidQuery(qid_query_file)   #(qid query)
    qid2query =  dict(zip(qid_list, [qp.process(query) for query in query_list]))
    
    # path of image feature
    train_feat_path = os.path.join(rootpath, trainCollection, 'FeatureData', feature)
    dev_feat_path = os.path.join(rootpath, devCollection, 'FeatureData', feature)


    # method selection
    if method =='conse':
        se_searcher = ConSE(label_source, corpus, word2vec_model, dev_feat_path, rootpath)

    elif method == 't2i' or method == 'ta': 
        nnquery_file = os.path.join(rootpath, devCollection, 'TextData','querynn', options.nnqueryfile)
        qryClick_file = os.path.join(rootpath, trainCollection, 'TextData', options.queryclickfile)
        t2i_searcher = Text2Image(nnquery_file, qryClick_file, dev_feat_path, train_feat_path, ntopqry)

    elif method == 'i2t' or method == 'ia':
        nnimage_file = os.path.join(rootpath, devCollection, 'TextData','imagenn', feature, options.nnimagefile)
        imgClick_file = os.path.join(rootpath, trainCollection, 'TextData', options.imageclickfile)
        i2t_searcher = Image2Text(nnimage_file, imgClick_file, qrysim, ntopimg, ntopqry)

    else:
        print "this model is not supported with %s" % method
        sys.exit(0)


 
    # calculate DCG@25
    scorer = getScorer(metric)

    done = 0
    failed_count = 0
    qid2dcg = collections.OrderedDict()
    qid2iid_label_score = {}

    for query_id in qid_list:

        iid_list, label_list = readAnnotationsFrom(devCollection, 'concepts%s.txt' % devCollection, query_id, False, rootpath)        

        if method == 'conse':
            scorelist = se_searcher.do_search(qid2query[query_id], iid_list, k)

        elif method == 't2i':
            scorelist = t2i_searcher.text2image(query_id, iid_list, qrythres, mincc )

        elif method == 'ta':
            scorelist = t2i_searcher.textAnnotation( query_id, iid_list, ntopimg, qrythres, mincc)

        elif method == 'i2t': 
            scorelist = i2t_searcher.image2text(qid2query[query_id], iid_list, mincc )

        elif method == 'ia':
            scorelist = i2t_searcher.imageAnnotation( qid2query[query_id], iid_list, mincc )    
         

        if len(scorelist) == 0: 
            failed_count += 1
            scorelist = [0]*len(iid_list)
            qid2iid_label_score[query_id] = zip(iid_list, label_list, scorelist)
            random.shuffle(qid2iid_label_score[query_id])
        else:
            qid2iid_label_score[query_id] = zip(iid_list, label_list, scorelist)
            qid2iid_label_score[query_id] = sorted(qid2iid_label_score[query_id], key=lambda v:v[2], reverse=True)


        # calculate the result ranking of DCG@25 from our model
        qid2dcg[query_id] = scorer.score([x[1] for x in qid2iid_label_score[query_id]])
        printMessage("Done", query_id, qid2query[query_id])

        done += 1
        if(done % 20 == 0):
            writeRankingResult(ranking_result_path, qid2iid_label_score)
            qid2iid_label_score = {}
    
    writeRankingResult(ranking_result_path, qid2iid_label_score)
    writeDCGResult(DCG_result_path, qid2dcg)
    print "number of failed query: %d" % failed_count 
    print "average DCG@25: %f" % (1.0*sum(qid2dcg.values())/ len(qid2dcg.values()))

    result_path_file = "result/individual_result_pathes.txt"
    if os.path.exists(result_path_file):
        fout = open(result_path_file,'a')
    else:
        makedirsforfile(result_path_file)
        fout = open(result_path_file, 'w')
    fout.write(ranking_result_path + '\n')
    fout.close()
Exemplo n.º 4
0
def process(options, trainCollection, devCollection):
    rootpath = options.rootpath
    overwrite = options.overwrite
    method = options.method
    metric = options.metric

    qrysim = options.qrysim
    qrythres = options.qrythres
    ntopimg = options.ntopimg
    ntopqry = options.ntopqry
    mincc = options.mincc
    feature = options.feature

    # semantic embedding
    k = options.k
    corpus = options.corpus
    word2vec_model = options.word2vec
    label_source = options.label_source

    # result path
    ranking_result_path = os.path.join(rootpath, devCollection,
                                       'SimilarityIndex', devCollection,
                                       'MetaData', method, feature)
    DCG_result_path = os.path.join(rootpath, devCollection, 'DCG', method,
                                   feature)
    if checkToSkip(ranking_result_path, overwrite):
        sys.exit(0)
    if checkToSkip(DCG_result_path, overwrite):
        sys.exit(0)

    # inpute of query
    qp = SimpleQueryParser()
    qid_query_file = os.path.join(rootpath, devCollection, 'Annotations',
                                  'qid.text.txt')
    qid_list, query_list = readQidQuery(qid_query_file)  #(qid query)
    qid2query = dict(zip(qid_list,
                         [qp.process(query) for query in query_list]))

    # path of image feature
    train_feat_path = os.path.join(rootpath, trainCollection, 'FeatureData',
                                   feature)
    dev_feat_path = os.path.join(rootpath, devCollection, 'FeatureData',
                                 feature)

    # method selection
    if method == 'se':
        se_searcher = SemanticEmbedding(label_source, corpus, word2vec_model,
                                        dev_feat_path, rootpath)

    elif method == 't2i':
        nnquery_file = os.path.join(rootpath, devCollection, 'TextData',
                                    'querynn', options.nnqueryfile)
        qryClick_file = os.path.join(rootpath, trainCollection, 'TextData',
                                     options.queryclickfile)
        t2i_searcher = Text2Image(nnquery_file, qryClick_file, dev_feat_path,
                                  train_feat_path, ntopqry)

    elif method == 'i2t':
        nnimage_file = os.path.join(rootpath, devCollection, 'TextData',
                                    'imagenn', feature, options.nnimagefile)
        imgClick_file = os.path.join(rootpath, trainCollection, 'TextData',
                                     options.imageclickfile)
        i2t_searcher = Image2Text(nnimage_file, imgClick_file, qrysim, ntopimg,
                                  ntopqry)

    else:
        print "this model is not supported with %s" % method
        sys.exit(0)

    # calculate DCG@25
    scorer = getScorer(metric)

    done = 0
    failed_count = 0
    qid2dcg = collections.OrderedDict()
    qid2iid_label_score = {}

    for query_id in qid_list:

        iid_list, label_list = readAnnotationsFrom(
            devCollection, 'concepts%s.txt' % devCollection, query_id, False,
            rootpath)

        if method == 'se':
            scorelist = se_searcher.do_search(qid2query[query_id], iid_list, k)

        elif method == 't2i':
            scorelist = t2i_searcher.text2image(query_id, iid_list, qrythres,
                                                mincc)

        elif method == 'i2t':
            scorelist = i2t_searcher.image2text(qid2query[query_id], iid_list,
                                                mincc)

        if len(scorelist) == 0:
            failed_count += 1
            scorelist = [0] * len(iid_list)
            qid2iid_label_score[query_id] = zip(iid_list, label_list,
                                                scorelist)
            random.shuffle(qid2iid_label_score[query_id])
        else:
            qid2iid_label_score[query_id] = zip(iid_list, label_list,
                                                scorelist)
            qid2iid_label_score[query_id] = sorted(
                qid2iid_label_score[query_id],
                key=lambda v: v[2],
                reverse=True)

        # calculate the result ranking of DCG@25 from our model
        qid2dcg[query_id] = scorer.score(
            [x[1] for x in qid2iid_label_score[query_id]])
        printMessage("Done", query_id, qid2query[query_id])

        done += 1
        if (done % 20 == 0):
            writeRankingResult(ranking_result_path, qid2iid_label_score)
            qid2iid_label_score = {}

    writeRankingResult(ranking_result_path, qid2iid_label_score)
    writeDCGResult(DCG_result_path, qid2dcg)
    print "number of failed query: %d" % failed_count
    print "average DCG@25: %f" % (1.0 * sum(qid2dcg.values()) /
                                  len(qid2dcg.values()))

    result_path_file = "result/individual_result_pathes.txt"
    if os.path.exists(result_path_file):
        fout = open(result_path_file, 'a')
    else:
        makedirsforfile(result_path_file)
        fout = open(result_path_file, 'w')
    fout.write(ranking_result_path + '\n')
    fout.close()
Exemplo n.º 5
0
def process(options, trainCollection, devCollection):
    rootpath = options.rootpath
    overwrite = options.overwrite
    metric = options.metric

    qrythres = options.qrythres
    ntopimg = options.ntopimg
    ntopqry = options.ntopqry
    mincc = options.mincc
    feature = options.feature


    # result path
    ranking_result_path = os.path.join(rootpath, devCollection, 'SimilarityIndex', devCollection, 'MetaData', 'text2image', feature)
    DCG_result_path = os.path.join(rootpath, devCollection, metric, 'text2image', feature)
    if checkToSkip(ranking_result_path, overwrite):
        sys.exit(0)
    if checkToSkip(DCG_result_path, overwrite):
        sys.exit(0)

    # inpute of query
    qp = SimpleQueryParser()
    qid_query_file = os.path.join(rootpath, devCollection, 'Annotations', 'qid.text.txt')
    qid_list, query_list = readQidQuery(qid_query_file)   #(qid query)
    qid2query =  dict(zip(qid_list, [qp.process(query) for query in query_list]))

    # random performance for specific queries
    qid_randomperf_file = os.path.join(rootpath, devCollection, 'Annotations', '*****@*****.**')
    qid2randomperf = {}
    for line in open(qid_randomperf_file):
        qid, random_perf = line.strip().split()
        qid2randomperf[qid] = float(random_perf)

    
    # path of image feature
    train_feat_path = os.path.join(rootpath, trainCollection, 'FeatureData', feature)
    dev_feat_path = os.path.join(rootpath, devCollection, 'FeatureData', feature)

    nnquery_file = os.path.join(rootpath, devCollection, 'TextData','querynn', options.nnqueryfile)
    qryClick_file = os.path.join(rootpath, trainCollection, 'TextData', options.queryclickfile)
    t2i_searcher = Text2Image(nnquery_file, qryClick_file, dev_feat_path, train_feat_path, ntopqry)

    # calculate DCG@25
    scorer = getScorer(metric)

    done = 0
    failed_count = 0
    qid2dcg = collections.OrderedDict()
    qid2iid_label_score = {}

    for query_id in qid_list:

        iid_list, label_list = readAnnotationsFrom(devCollection, 'concepts%s.txt' % devCollection, query_id, False, rootpath)        

        scorelist = t2i_searcher.doSearch( query_id, iid_list, ntopimg, qrythres, mincc)
         

        if len(scorelist) == 0: 
            failed_count += 1
            qid2dcg[query_id] = qid2randomperf[query_id]
        else:
            qid2iid_label_score[query_id] = zip(iid_list, label_list, scorelist)
            qid2iid_label_score[query_id] = sorted(qid2iid_label_score[query_id], key=lambda v:v[2], reverse=True)
            # calculate the result ranking of DCG@25 from our model
            qid2dcg[query_id] = scorer.score([x[1] for x in qid2iid_label_score[query_id]])
        printMessage("Done", query_id, qid2query[query_id])

        done += 1
        if(done % 20 == 0):
            writeRankingResult(ranking_result_path, qid2iid_label_score)
            qid2iid_label_score = {}
    
    writeRankingResult(ranking_result_path, qid2iid_label_score)
    writeDCGResult(DCG_result_path, qid2dcg)
    print "number of failed query: %d" % failed_count 
    print "average DCG@25: %f" % (1.0*sum(qid2dcg.values())/ len(qid2dcg.values()))