Пример #1
0
 def __init__(self, field='contents'):
     Jclass = autoclass('io.anserini.ltr.feature.DocSize')
     self.extractor = Jclass(field)
Пример #2
0
 def __init__(self, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.ProbalitySum')
     self.extractor = Jclass(field, qfield)
Пример #3
0
 def __init__(self, path, field, tag, qfield):
     Jclass = autoclass('io.anserini.ltr.feature.IbmModel1')
     self.extractor = Jclass(path, field, tag, qfield)
Пример #4
0
 def __init__(self, index_dir, worker_num=1):
     JFeatureExtractorUtils = autoclass('io.anserini.ltr.FeatureExtractorUtils')
     self.utils = JFeatureExtractorUtils(index_dir, worker_num)
     self.feature_name = []
Пример #5
0
 def __init__(self, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.NormalizedTfIdf')
     self.extractor = Jclass(field, qfield)
Пример #6
0
 def __init__(self, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.QueryCoverageRatio')
     self.extractor = Jclass(field, qfield)
Пример #7
0
"""

import logging
from typing import Dict, List, Optional, Union

from ._base import Document, JQuery, JQueryGenerator
from pyserini.pyclass import autoclass, JFloat, JArrayList, JHashMap, JString
from pyserini.trectools import TrecRun
from pyserini.fusion import FusionMethod, reciprocal_rank_fusion
from pyserini.util import download_prebuilt_index, get_sparse_indexes_info

logger = logging.getLogger(__name__)


# Wrappers around Anserini classes
JSimpleSearcher = autoclass('io.anserini.search.SimpleSearcher')
JSimpleSearcherResult = autoclass('io.anserini.search.SimpleSearcher$Result')


class SimpleSearcher:
    """Wrapper class for ``SimpleSearcher`` in Anserini.

    Parameters
    ----------
    index_dir : str
        Path to Lucene index directory.
    """

    def __init__(self, index_dir: str):
        self.index_dir = index_dir
        self.object = JSimpleSearcher(JString(index_dir))
Пример #8
0
 def __init__(self):
     Jclass = autoclass('io.anserini.ltr.MaxMinRatioPooler')
     self.extractor = Jclass()
Пример #9
0
 def __init__(self, sublinear, pooler, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.TfIdfStat')
     JBoolean = autoclass('java.lang.Boolean')
     self.extractor = Jclass(JBoolean(sublinear), pooler.extractor, field, qfield)
Пример #10
0
 def __init__(self):
     Jclass = autoclass('io.anserini.ltr.MedianPooler')
     self.extractor = Jclass()
Пример #11
0
 def __init__(self):
     Jclass = autoclass('io.anserini.ltr.ConfidencePooler')
     self.extractor = Jclass()
Пример #12
0
 def __init__(self, gap=8, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.OrderedQueryPairs')
     self.extractor = Jclass(gap, field, qfield)
Пример #13
0
 def qld(mu=1000):
     return autoclass('org.apache.lucene.search.similarities.LMDirichletSimilarity')(mu)
Пример #14
0
 def bm25(k1=0.9, b=0.4):
     return autoclass('org.apache.lucene.search.similarities.BM25Similarity')(k1, b)
Пример #15
0
 def __init__(self, qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.QueryLength')
     self.extractor = Jclass(qfield)
Пример #16
0
 def __init__(self, pooler, k1=0.9, b=0.4, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.BM25Stat')
     self.extractor = Jclass(pooler.extractor, k1, b, field, qfield)
Пример #17
0
 def __init__(self, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.SumMatchingTF')
     self.extractor = Jclass(field, qfield)
Пример #18
0
 def __init__(self, pooler, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.DfrGl2Stat')
     self.extractor = Jclass(pooler.extractor, field, qfield)
Пример #19
0
 def __init__(self,filename,tag):
     Jclass = autoclass('io.anserini.ltr.feature.RunList')
     self.extractor = Jclass(filename,tag)
Пример #20
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import json
import math
import struct
import subprocess
import sys
sys.path.append('.')
from multiprocessing.pool import ThreadPool
from pyserini.pyclass import autoclass, JString
from typing import List, Set, Dict

JSimpleSearcher = autoclass('io.anserini.search.SimpleSearcher')
JIndexReader = autoclass('io.anserini.index.IndexReaderUtils')
JTerm = autoclass('org.apache.lucene.index.Term')

SELF_TRAN = 0.35
MIN_PROB = 0.0025
LAMBDA_VALUE = 0.3
MIN_COLLECT_PROB = 1e-9


def normalize(scores: List[float]):
    low = min(scores)
    high = max(scores)
    width = high - low
    if width != 0:
        return [(s - low) / width for s in scores]