示例#1
0
def test_nrt():
    indexer = engine.Indexer(nrt=True)
    indexer.add()
    assert indexer.count() == 0 and not indexer.current
    indexer.refresh()
    assert indexer.count() == 1 and indexer.current
    searcher = engine.IndexSearcher(indexer.directory)
    assert searcher.count() == 0 and searcher.current
    indexer.add()
    indexer.commit()
    assert indexer.count() == engine.IndexSearcher(
        indexer.directory).count() == 2
示例#2
0
 def __init__(self, *directories, **kwargs):
     if len(directories) > 1:
         self.searcher = engine.MultiSearcher(directories, **kwargs)
     else:
         self.searcher = engine.IndexSearcher(*directories, **kwargs)
     self.updated = time.time()
     self.query_map = {}
示例#3
0
def test_indexes(tempdir):
    with pytest.raises(TypeError):
        engine.IndexSearcher()
    with pytest.raises(lucene.JavaError):
        engine.Indexer(tempdir, 'r')
    indexer = engine.Indexer()
    indexer.set('name', engine.Field.String, stored=True)
    indexer.set('text', engine.Field.Text)
    with engine.Indexer(tempdir) as temp:
        temp.add()
    with pytest.raises(KeyError), engine.Indexer(tempdir) as temp:
        temp.add()
        temp.add(missing='')
    for other in (temp, temp.directory, tempdir):
        indexer += other
    assert len(indexer) == 3
    analyzer = engine.Analyzer.whitespace()
    indexer.add(text=analyzer.tokens('?'), name=util.BytesRef('{}'))
    indexer.commit()
    assert indexer[next(indexer.docs('text', '?'))]['name'] == '{}'
    indexer.delete('text', '?')
    indexer.commit(merge=True)
    assert not indexer.hasDeletions()
    indexer.commit(merge=1)
    assert len(list(indexer.readers)) == 1
    reader = engine.indexers.IndexReader(indexer.indexReader)
    del reader.indexReader
    with pytest.raises(AttributeError):
        reader.maxDoc
    del indexer.indexSearcher
    with pytest.raises(AttributeError):
        indexer.search

    indexer = engine.Indexer(tempdir)
    indexer.add()
    indexer.commit()
    files = set(os.listdir(tempdir))
    path = os.path.join(tempdir, 'temp')
    with indexer.snapshot() as commit:
        indexer.commit(merge=1)
        assert indexer.indexCommit.generation > commit.generation
        engine.indexers.copy(commit, path)
        assert set(os.listdir(path)) == set(commit.fileNames) < files < set(
            os.listdir(tempdir))
        filepath = os.path.join(path, commit.segmentsFileName)
        os.remove(filepath)
        open(filepath, 'w').close()
        with pytest.raises(OSError):
            engine.indexers.copy(commit, path)
    with pytest.raises(lucene.JavaError):
        indexer.check(tempdir)
    del indexer
    assert engine.Indexer(tempdir)
    assert not os.path.exists(os.path.join(tempdir, commit.segmentsFileName))
    assert engine.IndexWriter.check(tempdir).clean
    assert not engine.IndexWriter.check(tempdir, fix=True).numBadSegments
示例#4
0
 def __init__(self, *directories, **kwargs):
     self.urls = collections.deque(kwargs.pop('urls', ()))
     if self.urls:
         engine.IndexWriter(*directories).close()
     self.searcher = engine.MultiSearcher(
         directories, **
         kwargs) if len(directories) > 1 else engine.IndexSearcher(
             *directories, **kwargs)
     self.updated = time.time()
     self.query_map = {}
示例#5
0
def test_grouping(tempdir, indexer, zipcodes):
    field = indexer.fields['location'] = engine.NestedField(
        'state.county.city', docValuesType='sorted')
    for doc in zipcodes:
        if doc['state'] in ('CA', 'AK', 'WY', 'PR'):
            lat, lng = ('{0:08.3f}'.format(doc.pop(l))
                        for l in ['latitude', 'longitude'])
            location = '.'.join(doc[name]
                                for name in ['state', 'county', 'city'])
            indexer.add(doc, latitude=lat, longitude=lng, location=location)
    indexer.commit()
    states = list(indexer.terms('state'))
    assert states[0] == 'AK' and states[-1] == 'WY'
    counties = [
        term.split('.')[-1] for term in indexer.terms('state.county', 'CA')
    ]
    hits = indexer.search(field.prefix('CA'))
    assert sorted({hit['county'] for hit in hits}) == counties
    assert counties[0] == 'Alameda' and counties[-1] == 'Yuba'
    cities = [
        term.split('.')[-1]
        for term in indexer.terms('state.county.city', 'CA.Los Angeles')
    ]
    hits = indexer.search(field.prefix('CA.Los Angeles'))
    assert sorted({hit['city'] for hit in hits}) == cities
    assert cities[0] == 'Acton' and cities[-1] == 'Woodland Hills'
    (hit, ) = indexer.search('zipcode:90210')
    assert hit['state'] == 'CA' and hit['county'] == 'Los Angeles' and hit[
        'city'] == 'Beverly Hills' and hit['longitude'] == '-118.406'
    query = Q.prefix('zipcode', '90')
    ((field, facets), ) = indexer.facets(query, 'state.county').items()
    assert field == 'state.county'
    la, orange = sorted(filter(facets.get, facets))
    assert la == 'CA.Los Angeles' and facets[la] > 100
    assert orange == 'CA.Orange' and facets[orange] > 10
    queries = {
        term: Q.term(field, term)
        for term in indexer.terms(field, 'CA.')
    }
    ((field, facets), ) = indexer.facets(query, **{field: queries}).items()
    assert all(value.startswith('CA.')
               for value in facets) and set(facets) == set(queries)
    assert facets['CA.Los Angeles'] == 264
    groups = indexer.groupby(field, Q.term('state', 'CA'), count=1)
    assert len(groups) == 1 < groups.count
    (hits, ) = groups
    assert hits.value == 'CA.Los Angeles' and len(
        hits) == 1 and hits.count > 100
    grouping = engine.documents.GroupingSearch(field,
                                               sort=search.Sort(
                                                   indexer.sortfield(field)),
                                               cache=False,
                                               allGroups=True)
    assert all(
        grouping.search(indexer.indexSearcher, Q.alldocs()).facets.values())
    assert len(grouping) == len(list(grouping)) > 100
    assert set(grouping) > set(facets)
    hits = indexer.search(query, timeout=-1)
    assert not hits and not hits.count and math.isnan(hits.maxscore)
    hits = indexer.search(query, timeout=10)
    assert len(hits) == hits.count == indexer.count(
        query) and hits.maxscore == 1.0
    directory = store.RAMDirectory()
    query = Q.term('state', 'CA')
    size = indexer.copy(directory, query)
    searcher = engine.IndexSearcher(directory)
    assert len(searcher) == size and list(searcher.terms('state')) == ['CA']
    path = os.path.join(tempdir, 'temp')
    size = indexer.copy(path, exclude=query, merge=1)
    assert len(searcher) + size == len(indexer)
    searcher = engine.IndexSearcher(path)
    assert len(searcher.segments) == 1 and 'CA' not in searcher.terms('state')
    directory.close()