Пример #1
0
 def __init__(self, ranker, prels, anserini, index, topn=10):
     AbstractQExpander.__init__(self, replace=False, topn=topn)
     self.prels = prels
     self.f = None
     self.anserini = anserini
     self.index = index
     self.ranker = ranker
Пример #2
0
def build(input, expanders, rankers, metrics, output):
    base_model_name = AbstractQExpander().get_model_name()
    df = pd.read_csv(input)
    ds_df = df.iloc[:, :1 + 1 +
                    len(rankers) * len(metrics)]  #the original query info
    ds_df['star_model_count'] = 0
    for idx, row in df.iterrows():
        star_models = dict()
        for model in expanders:
            model_name = model.get_model_name()
            if model_name == base_model_name:
                continue
            flag = True
            sum = 0
            for ranker in rankers:
                for metric in metrics:
                    v = df.loc[idx, '{}.{}.{}'.format(
                        model_name, ef.get_ranker_name(ranker), metric)]
                    v = v if not pd.isna(v) else 0
                    v0 = df.loc[idx, '{}.{}.{}'.format(
                        base_model_name, ef.get_ranker_name(ranker), metric)]
                    v0 = v0 if not pd.isna(v0) else 0
                    if v <= v0:
                        flag = False
                        break
                    sum += v**2
            if flag:
                star_models[model] = sum

        if len(star_models) > 0:
            ds_df.loc[idx, 'star_model_count'] = len(star_models.keys())
            star_models_sorted = {
                k: v
                for k, v in sorted(star_models.items(),
                                   key=lambda item: item[1],
                                   reverse=True)
            }
            for i, star_model in enumerate(star_models_sorted.keys()):
                ds_df.loc[idx,
                          '{}.{}'.format('method', i +
                                         1)] = star_model.get_model_name()
                ds_df.loc[idx, '{}.{}'.format('metric', i + 1)] = math.sqrt(
                    star_models[star_model])
                ds_df.loc[idx, '{}.{}'.format('query', i + 1)] = df.loc[
                    idx, '{}'.format(star_model.get_model_name())]
        else:
            ds_df.loc[idx, 'star_model_count'] = 0
    filename = '{}.{}.{}.dataset.csv'.format(
        output, '.'.join([ef.get_ranker_name(r) for r in rankers]),
        '.'.join(metrics))
    ds_df.to_csv(filename, index=False)
    return filename
Пример #3
0
def get_nrf_expanders():
    expanders = [
        AbstractQExpander(),
        Thesaurus(),
        Wordnet(),
        Word2Vec('../pre/wiki-news-300d-1M.vec'),
        Glove('../pre/glove.6B.300d'),
        Anchor(anchorfile='../pre/anchor_text_en.ttl',
               vectorfile='../pre/wiki-anchor-text-en-ttl-300d.vec'),
        Wiki('../pre/temp_model_Wiki'),
        Tagmee(),
        SenseDisambiguation(),
        Conceptnet(),
        Thesaurus(replace=True),
        Wordnet(replace=True),
        Word2Vec('../pre/wiki-news-300d-1M.vec', replace=True),
        Glove('../pre/glove.6B.300d', replace=True),
        Anchor(anchorfile='../pre/anchor_text_en.ttl',
               vectorfile='../pre/wiki-anchor-text-en-ttl-300d.vec',
               replace=True),
        Wiki('../pre/temp_model_Wiki', replace=True),
        Tagmee(replace=True),
        SenseDisambiguation(replace=True),
        Conceptnet(replace=True),
        Stem(KrovetzStemmer(jarfile='stemmers/kstem-3.4.jar')),
        Stem(LovinsStemmer()),
        Stem(PaiceHuskStemmer()),
        Stem(PorterStemmer()),
        Stem(Porter2Stemmer()),
        Stem(SRemovalStemmer()),
        Stem(Trunc4Stemmer()),
        Stem(Trunc5Stemmer()),
        # since RF needs index and search output which depends on ir method and topics database, we cannot add this here. Instead, we run it individually
        # RF assumes that there exist abstractqueryexpansion files
    ]

    return expanders
Пример #4
0
 def __init__(self, replace=False):
     AbstractQExpander.__init__(self, replace)
Пример #5
0
 def __init__(self, vectorfile, replace=False, topn=3):
     AbstractQExpander.__init__(self, replace, topn)
     Word2Vec.vectorfile = vectorfile
     Word2Vec.word2vec = None
Пример #6
0
 def __init__(self, vectorfile, replace=False, topn=3):
     AbstractQExpander.__init__(self, replace, topn)
     Glove.vectorfile = vectorfile
     Glove.glove = None
Пример #7
0
 def __init__(self, replace=False, topn=3):
     AbstractQExpander.__init__(self, replace, topn)
Пример #8
0
def get_nrf_expanders():
    expanders_list = [AbstractQExpander()]
    if param.ReQue['expanders']['Thesaurus']:
        from expanders.thesaurus import Thesaurus
        expanders_list.append(Thesaurus())
    if param.ReQue['expanders']['Thesaurus']:
        from expanders.thesaurus import Thesaurus
        expanders_list.append(Thesaurus(replace=True))
    if param.ReQue['expanders']['Wordnet']:
        from expanders.wordnet import Wordnet
        expanders_list.append(Wordnet())
    if param.ReQue['expanders']['Wordnet']:
        from expanders.wordnet import Wordnet
        expanders_list.append(Wordnet(replace=True))
    if param.ReQue['expanders']['Word2Vec']:
        from expanders.word2vec import Word2Vec
        expanders_list.append(Word2Vec('../pre/wiki-news-300d-1M.vec'))
    if param.ReQue['expanders']['Word2Vec']:
        from expanders.word2vec import Word2Vec
        expanders_list.append(
            Word2Vec('../pre/wiki-news-300d-1M.vec', replace=True))
    if param.ReQue['expanders']['Glove']:
        from expanders.glove import Glove
        expanders_list.append(Glove('../pre/glove.6B.300d'))
    if param.ReQue['expanders']['Glove']:
        from expanders.glove import Glove
        expanders_list.append(Glove('../pre/glove.6B.300d', replace=True))
    if param.ReQue['expanders']['Anchor']:
        from expanders.anchor import Anchor
        expanders_list.append(
            Anchor(anchorfile='../pre/anchor_text_en.ttl',
                   vectorfile='../pre/wiki-anchor-text-en-ttl-300d.vec'))
    if param.ReQue['expanders']['Anchor']:
        from expanders.anchor import Anchor
        expanders_list.append(
            Anchor(anchorfile='../pre/anchor_text_en.ttl',
                   vectorfile='../pre/wiki-anchor-text-en-ttl-300d.vec',
                   replace=True))
    if param.ReQue['expanders']['Wiki']:
        from expanders.wiki import Wiki
        expanders_list.append(Wiki('../pre/temp_model_Wiki'))
    if param.ReQue['expanders']['Wiki']:
        from expanders.wiki import Wiki
        expanders_list.append(Wiki('../pre/temp_model_Wiki', replace=True))
    if param.ReQue['expanders']['Tagmee']:
        from expanders.tagmee import Tagmee
        expanders_list.append(Tagmee())
    if param.ReQue['expanders']['Tagmee']:
        from expanders.tagmee import Tagmee
        expanders_list.append(Tagmee(replace=True))
    if param.ReQue['expanders']['SenseDisambiguation']:
        from expanders.sensedisambiguation import SenseDisambiguation
        expanders_list.append(SenseDisambiguation())
    if param.ReQue['expanders']['SenseDisambiguation']:
        from expanders.sensedisambiguation import SenseDisambiguation
        expanders_list.append(SenseDisambiguation(replace=True))
    if param.ReQue['expanders']['Conceptnet']:
        from expanders.conceptnet import Conceptnet
        expanders_list.append(Conceptnet())
    if param.ReQue['expanders']['Conceptnet']:
        from expanders.conceptnet import Conceptnet
        expanders_list.append(Conceptnet(replace=True))
    if param.ReQue['expanders']['KrovetzStemmer']:
        from stemmers.krovetz import KrovetzStemmer
        expanders_list.append(
            Stem(KrovetzStemmer(jarfile='stemmers/kstem-3.4.jar')))
    if param.ReQue['expanders']['LovinsStemmer']:
        from stemmers.lovins import LovinsStemmer
        expanders_list.append(Stem(LovinsStemmer()))
    if param.ReQue['expanders']['PaiceHuskStemmer']:
        from stemmers.paicehusk import PaiceHuskStemmer
        expanders_list.append(Stem(PaiceHuskStemmer()))
    if param.ReQue['expanders']['PorterStemmer']:
        from stemmers.porter import PorterStemmer
        expanders_list.append(Stem(PorterStemmer()))
    if param.ReQue['expanders']['Porter2Stemmer']:
        from stemmers.porter2 import Porter2Stemmer
        expanders_list.append(Stem(Porter2Stemmer()))
    if param.ReQue['expanders']['SRemovalStemmer']:
        from stemmers.sstemmer import SRemovalStemmer
        expanders_list.append(Stem(SRemovalStemmer()))
    if param.ReQue['expanders']['Trunc4Stemmer']:
        from stemmers.trunc4 import Trunc4Stemmer
        expanders_list.append(Stem(Trunc4Stemmer()))
    if param.ReQue['expanders']['Trunc5Stemmer']:
        from stemmers.trunc5 import Trunc5Stemmer
        expanders_list.append(Stem(Trunc5Stemmer()))
    # since RF needs index and search output which depends on ir method and topics corpora, we cannot add this here. Instead, we run it individually
    # RF assumes that there exist abstractqueryexpansion files

    return expanders_list