def testGenericDrilldownFields(self): with warnings.catch_warnings(): warnings.simplefilter("ignore") registry = FieldRegistry(isDrilldownFieldFunction=lambda name: name.startswith('drilldown')) self.assertTrue(registry.isDrilldownField('drilldown.aap')) self.assertTrue(registry.isDrilldownField('drilldown.noot')) self.assertFalse(registry.isDrilldownField('noot'))
def testAddWithoutIdentifier(self): registry = FieldRegistry() fields = [registry.createField("id", "id1")] consume(self._lucene.addDocument(fields=fields)) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/update/?', self.post[0]['path']) self.assertEqual('[{"type": "TextField", "name": "id", "value": "id1"}]', self.post[0]['data'])
def testDefault(self): registry = FieldRegistry() field = registry.createField('__id__', 'id:1') self.assertFalse(field.fieldType().tokenized()) self.assertTrue(field.fieldType().stored()) self.assertTrue(field.fieldType().indexed()) self.assertTrue(registry.isUntokenized('__id__'))
def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self): fieldRegistry = FieldRegistry() fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELDTYPE) self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0), ('noTermFreqField', 2.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry)) expected = PhraseQuery() expected.add(Term("unqualified", "phrase query")) self.assertConversion(expected, '"phrase query"')
def testDefault(self): registry = FieldRegistry() field = registry.createField('__id__', 'id:1') self.assertEquals({ "type": "StringField", "name": "__id__", "value": "id:1", "stored": True }, field)
def testNumericField(self): registry = FieldRegistry() registry.register('fieldname', NUMERICFIELD) field = registry.createField('fieldname', 2010) self.assertEquals({ "type": "NumericField", "name": "fieldname", "value": 2010, }, field)
def testIsUntokenized(self): registry = FieldRegistry(drilldownFields=[DrilldownField('aDrilldownField')]) self.assertTrue(registry.isUntokenized('aDrilldownField')) self.assertTrue(registry.isUntokenized('untokenized.some.field')) self.assertFalse(registry.isUntokenized('other.field')) registry.register('fieldname', STRINGFIELD) self.assertTrue(registry.isUntokenized('fieldname')) registry.register('fieldname', TEXTFIELD) self.assertFalse(registry.isUntokenized('fieldname'))
def testIsNumeric(self): registry = FieldRegistry() registry.register("longfield", fieldDefinition=LONGFIELD) registry.register("intfield", fieldDefinition=INTFIELD) self.assertFalse(registry.isNumeric('field1')) self.assertTrue(registry.isNumeric('longfield')) self.assertTrue(registry.isNumeric('intfield')) self.assertTrue(registry.isNumeric('range.double.afield')) self.assertTrue(registry.isNumeric('__key__.field1'))
def testNoTermsFreqField(self): registry = FieldRegistry() registry.register('fieldname', NO_TERMS_FREQUENCY_FIELD) field = registry.createField('fieldname', 'value') self.assertEquals({ "type": "NoTermsFrequencyField", "name": "fieldname", "value": "value", }, field)
def testPostDictWithDrilldownFields(self): fieldRegistry = FieldRegistry() fieldRegistry.registerDrilldownField("field0", hierarchical=True, multiValued=False) fieldRegistry.registerDrilldownField("field1", hierarchical=True, multiValued=True, indexFieldName="$facets_2") settings = LuceneSettings(fieldRegistry=fieldRegistry) soll = copy(DEFAULTS) soll['drilldownFields'] = [ {'dim': 'field0', 'hierarchical': True, 'fieldname': None, 'multiValued': False}, {'dim': 'field1', 'hierarchical': True, 'fieldname': '$facets_2', 'multiValued': True}] self.assertEquals(soll, settings.asPostDict())
def testIsUntokenized(self): registry = FieldRegistry() self.assertTrue(registry.isUntokenized('untokenized.some.field')) registry.register('fieldname', StringField.TYPE_NOT_STORED) self.assertTrue(registry.isUntokenized('fieldname')) registry.register('fieldname', TextField.TYPE_NOT_STORED) self.assertFalse(registry.isUntokenized('fieldname'))
def testReuseCreatedField(self): registry = FieldRegistry() field = registry.createField('fieldname', 'value') self.assertEquals("value", field.stringValue()) newField = registry.createField('fieldname', 'newvalue', mayReUse=True) self.assertEquals("newvalue", newField.stringValue()) self.assertEquals(field, newField) newField2 = registry.createField('fieldname', 'newvalue', mayReUse=False) self.assertEquals("newvalue", newField2.stringValue()) self.assertNotEqual(newField, newField2)
def testDrilldownFieldQuery(self): self.fieldRegistry = FieldRegistry( [DrilldownField('field', hierarchical=True)]) self.assertEquals( dict(type="TermQuery", term=dict(field="field", path=["value"], type="DrillDown")), self._convert("field = value")) self.assertEquals( dict(type="TermQuery", term=dict(field="field", path=["value", "value1"], type="DrillDown")), self._convert("field = \"value>value1\""))
def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery( self): self.fieldRegistry = FieldRegistry() self.fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELD) self.unqualifiedFields = [("unqualified", 1.0), ('noTermFreqField', 2.0)] expected = dict(type="PhraseQuery", terms=[ dict(field="unqualified", value="phrase"), dict(field="unqualified", value="query") ], boost=1.0) self.assertEquals(expected, self._convert('"phrase query"'))
def testSpecificField(self): registry = FieldRegistry() field = registry.createField('fieldname', 'value') self.assertEquals({ "type": "TextField", "name": "fieldname", "value": "value", }, field) registry.register('fieldname', STRINGFIELD_STORED) field = registry.createField('fieldname', 'value') self.assertEquals({ "type": "StringField", "name": "fieldname", "value": "value", "stored": True }, field)
def __init__( self, commitTimeout=10, commitCount=100000, multithreaded=True, readonly=False, lruTaxonomyWriterCacheSize=4000, analyzer=MerescoStandardAnalyzer(), similarity=BM25Similarity(), fieldRegistry=FieldRegistry(), maxMergeAtOnce=2, segmentsPerTier=8.0, numberOfConcurrentTasks=6, verbose=True, ): self.commitTimeout = commitTimeout self.commitCount = commitCount self.multithreaded = multithreaded self.readonly = readonly self.lruTaxonomyWriterCacheSize = lruTaxonomyWriterCacheSize self.analyzer = analyzer self.similarity = similarity self.fieldRegistry = fieldRegistry self.maxMergeAtOnce = maxMergeAtOnce self.segmentsPerTier = segmentsPerTier self.numberOfConcurrentTasks = numberOfConcurrentTasks self.verbose = verbose
def testRangeQueryAndType(self): registry = FieldRegistry() registry.register("longfield", fieldDefinition=LONGFIELD) registry.register("intfield", fieldDefinition=INTFIELD) q, t = registry.rangeQueryAndType('longfield') self.assertEqual("Long", q) self.assertEqual(long, t) q, t = registry.rangeQueryAndType('intfield') self.assertEqual("Int", q) self.assertEqual(int, t) q, t = registry.rangeQueryAndType('range.double.field') self.assertEqual("Double", q) self.assertEqual(float, t) q, t = registry.rangeQueryAndType('anyfield') self.assertEqual("String", q) self.assertEqual(str, t)
def testDrilldownFieldQuery(self): self.fieldRegistry = FieldRegistry([DrilldownField('field', hierarchical=True)]) self.assertEquals( dict(type="TermQuery", term=dict(field="field", path=["value"], type="DrillDown")), self._convert("field = value")) self.assertEquals( dict(type="TermQuery", term=dict(field="field", path=["value", "value1"], type="DrillDown")), self._convert("field = \"value>value1\""))
def testTermVectorsForField(self): registry = FieldRegistry(termVectorFields=['field1', 'field2']) self.assertTrue(registry.isTermVectorField('field1')) self.assertTrue(registry.isTermVectorField('field2')) self.assertFalse(registry.isTermVectorField('field3')) field = registry.createField('field1', 'id:1') self.assertEquals({ "type": "TextField", "name": "field1", "value": "id:1", "termVectors": True, }, field) field = registry.createField('field2', 'id:1') self.assertEquals({ "type": "TextField", "name": "field2", "value": "id:1", "termVectors": True, }, field) field = registry.createField('field3', 'id:1') self.assertEquals({ "type": "TextField", "name": "field3", "value": "id:1", }, field)
def testPostDictWithDrilldownFields(self): fieldRegistry = FieldRegistry() fieldRegistry.registerDrilldownField("field0", hierarchical=True, multiValued=False) fieldRegistry.registerDrilldownField("field1", hierarchical=True, multiValued=True, indexFieldName="$facets_2") settings = LuceneSettings(fieldRegistry=fieldRegistry) self.assertEqual({ 'lruTaxonomyWriterCacheSize': 4000, 'maxMergeAtOnce': 2, 'similarity': {'type': 'BM25Similarity'}, 'numberOfConcurrentTasks': 6, 'segmentsPerTier': 8.0, 'analyzer': {'type': 'MerescoStandardAnalyzer'}, 'drilldownFields': [ {'dim': 'field0', 'hierarchical': True, 'fieldname': None, 'multiValued': False}, {'dim': 'field1', 'hierarchical': True, 'fieldname': '$facets_2', 'multiValued': True}], 'commitCount': 100000, 'commitTimeout': 10 }, settings.asPostDict())
def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self): self.fieldRegistry = FieldRegistry() self.fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELD) self.unqualifiedFields = [("unqualified", 1.0), ('noTermFreqField', 2.0)] expected = dict(type="PhraseQuery", terms=[ dict(field="unqualified", value="phrase"), dict(field="unqualified", value="query") ], boost=1.0) self.assertEquals(expected, self._convert('"phrase query"'))
def luceneAndReaderConfig(defaultLuceneSettings, httpRequestAdapter, lucenePort): fieldRegistry = FieldRegistry(drilldownFields=drilldownFields) luceneIndex = be((Lucene( host='127.0.0.1', port=lucenePort, name=DEFAULT_CORE, settings=defaultLuceneSettings.clone(fieldRegistry=fieldRegistry)), (httpRequestAdapter, ))) return luceneIndex
def testAddDocument(self): fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry()) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' fields2LuceneDoc.addField('field', 'value') consume(fields2LuceneDoc.commit('unused')) self.assertEquals(['addDocument'], observer.calledMethodNames()) self.assertEquals('identifier', observer.calledMethods[0].kwargs['identifier'])
def setUp(self, fieldRegistry=FieldRegistry()): super(LuceneTestCase, self).setUp() self._javaObjects = self._getJavaObjects() self._reactor = CallTrace('reactor', methods={'addTimer': lambda seconds, callback: CallTrace('timer')}) self._defaultSettings = LuceneSettings(commitCount=1, commitTimeout=1, fieldRegistry=fieldRegistry) self.lucene = Lucene( join(self.tempdir, 'lucene'), reactor=self._reactor, settings=self._defaultSettings, ) self.observer = CallTrace() self.lucene.addObserver(self.observer)
def testSpecificField(self): registry = FieldRegistry() field = registry.createField('fieldname', 'value') self.assertFalse(field.fieldType().stored()) registry.register('fieldname', StringField.TYPE_STORED) field = registry.createField('fieldname', 'value') self.assertTrue(field.fieldType().stored())
def _prepareLuceneSettings(self): settings = LuceneSettings() if hasattr(self, '_analyzer'): settings.analyzer = self._analyzer if hasattr(self, 'fieldRegistry'): settings.fieldRegistry = self.fieldRegistry else: settings.fieldRegistry = FieldRegistry() settings.fieldRegistry.register("intField", fieldDefinition=INTFIELD) settings.fieldRegistry.register("longField", fieldDefinition=LONGFIELD) return settings
def testCreateDocument(self): fields = { 'field1': ['value1'], 'field2': ['value2', 'value2.1'], 'sorted.field3': ['value3'], 'untokenized.field4': ['value4'], '__key__.field5': ["12345"], '__numeric__.field6': ["12345"], } fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry()) observer = CallTrace(returnValues={'numerateTerm': 1}) fields2LuceneDoc.addObserver(observer) document = fields2LuceneDoc._createDocument(fields) self.assertEquals( set([ 'field1', 'field2', 'sorted.field3', 'untokenized.field4', '__key__.field5', '__numeric__.field6' ]), set([f.name() for f in document.getFields()])) field1 = document.getField("field1") self.assertEquals('value1', field1.stringValue()) self.assertTrue(field1.fieldType().indexed()) self.assertFalse(field1.fieldType().stored()) self.assertTrue(field1.fieldType().tokenized()) self.assertEquals(['value2', 'value2.1'], document.getValues('field2')) field3 = document.getField("sorted.field3") self.assertEquals('value3', field3.stringValue()) self.assertTrue(field3.fieldType().indexed()) self.assertFalse(field3.fieldType().stored()) self.assertFalse(field3.fieldType().tokenized()) field4 = document.getField("untokenized.field4") self.assertEquals('value4', field4.stringValue()) self.assertTrue(field4.fieldType().indexed()) self.assertFalse(field4.fieldType().stored()) self.assertFalse(field4.fieldType().tokenized()) field5 = document.getField("__key__.field5") self.assertEquals(1, field5.numericValue().longValue()) self.assertFalse(field5.fieldType().indexed()) self.assertFalse(field5.fieldType().stored()) self.assertTrue(field5.fieldType().tokenized()) field6 = document.getField("__numeric__.field6") self.assertEquals(12345, field6.numericValue().longValue()) self.assertFalse(field6.fieldType().indexed()) self.assertFalse(field6.fieldType().stored()) self.assertTrue(field6.fieldType().tokenized())
def testCreateDocument(self): fields = { 'field1': ['value1'], 'field2': ['value2', 'value2.1'], 'sorted.field3': ['value3'], 'untokenized.field4': ['value4'], '__key__.field5': [12345], '__numeric__.field6': [12345], } fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry()) fields = fields2LuceneDoc._createFields(fields) self.assertEqual([ { "name": "field2", "type": "TextField", "value": "value2" }, { "name": "field2", "type": "TextField", "value": "value2.1" }, { "name": "__key__.field5", "type": "KeyField", "value": 12345 }, { "name": "field1", "type": "TextField", "value": "value1" }, { "name": "sorted.field3", "type": "StringField", "value": "value3", "sort": True, }, { "name": "__numeric__.field6", "type": "NumericField", "value": 12345 }, { "name": "untokenized.field4", "type": "StringField", "value": "value4" } ], fields)
def testCreateFacet(self): fields = { 'field1': ['value1'], 'sorted.field3': ['value3'], 'untokenized.field4': ['value4'], 'untokenized.field5': ['value5', 'value6'], 'untokenized.field6': ['value5/value6'], 'untokenized.field7': ['valuex'], 'untokenized.field8': [['grandparent', 'parent', 'child'], ['parent2', 'child']] } fields2LuceneDoc = Fields2LuceneDoc( 'tsname', fieldRegistry=FieldRegistry(drilldownFields=[ DrilldownField('untokenized.field4'), DrilldownField('untokenized.field5'), DrilldownField('untokenized.field6'), DrilldownField('untokenized.field8', hierarchical=True), ])) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' for field, values in fields.items(): for value in values: fields2LuceneDoc.addField(field, value) consume(fields2LuceneDoc.commit('unused')) document = observer.calledMethods[0].kwargs['document'] searchFields = [ f for f in document.getFields() if not FacetField.instance_(f) ] self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'], [f.name() for f in searchFields]) facetsFields = [ FacetField.cast_(f) for f in document.getFields() if FacetField.instance_(f) ] self.assertEquals(6, len(facetsFields)) self.assertEquals([ ('untokenized.field8', ['grandparent', 'parent', 'child']), ('untokenized.field8', ['parent2', 'child']), ('untokenized.field6', ['value5/value6']), ('untokenized.field4', ['value4']), ('untokenized.field5', ['value5']), ('untokenized.field5', ['value6']), ], [(f.dim, list(f.path)) for f in facetsFields]) # Note: a FacetField doesn't have a name
def testOnlyOneSortValueAllowed(self): fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry() ) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' fields2LuceneDoc.addField('sorted.field', 'value1') fields2LuceneDoc.addField('sorted.field', 'value2') consume(fields2LuceneDoc.commit('unused')) fields = observer.calledMethods[0].kwargs['fields'] self.assertEquals(1, len(fields)) self.assertEqual({'sort': True, 'type': 'StringField', 'name': 'sorted.field', 'value': 'value1'}, fields[0])
def setUp(self): SeecrTestCase.setUp(self) self.registry = FieldRegistry() self._multiLucene = MultiLucene(defaultCore='coreA', host="localhost", port=12345) self._lucene = Lucene(host="localhost", port=12345, settings=LuceneSettings(), name='coreA') self._multiLucene.addObserver(self._lucene) self.post = [] self.response = "" def mockPost(data, path, **kwargs): self.post.append(dict(data=data, path=path)) raise StopIteration(self.response) yield connect = self._multiLucene._connect() connect._post = mockPost self._multiLucene._connect = lambda: connect
def testAddFacetField(self): fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry(drilldownFields=[ DrilldownField('untokenized.field'), ]) ) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' fields2LuceneDoc.addField('field', 'value') fields2LuceneDoc.addFacetField('untokenized.field', 'untokenized value') consume(fields2LuceneDoc.commit('unused')) fields = observer.calledMethods[0].kwargs['fields'] facetsFields = [f for f in fields if "path" in f] self.assertEquals(1, len(facetsFields))
def testDefaultDefinition(self): registry = FieldRegistry() field = registry.createField('aField', 'id:1') self.assertEquals({ "type": "TextField", "name": "aField", "value": "id:1", }, field) self.assertFalse(registry.isUntokenized('aField')) registry = FieldRegistry(defaultDefinition=STRINGFIELD) field = registry.createField('aField', 'id:1') self.assertEquals({ "type": "StringField", "name": "aField", "value": "id:1", }, field) self.assertTrue(registry.isUntokenized('aField'))
def testDrilldownFields(self): drilldownFields = [ DrilldownField(name='aap'), DrilldownField(name='noot', hierarchical=True) ] registry = FieldRegistry(drilldownFields=drilldownFields) registry.registerDrilldownField(fieldname='mies', multiValued=False) self.assertTrue(registry.isDrilldownField('aap')) self.assertTrue(registry.isDrilldownField('noot')) self.assertTrue(registry.isDrilldownField('mies')) self.assertFalse(registry.isDrilldownField('vuur')) self.assertFalse(registry.isHierarchicalDrilldown('aap')) self.assertTrue(registry.isHierarchicalDrilldown('noot')) facetsConfig = registry.facetsConfig dimConfigs = facetsConfig.getDimConfigs() self.assertEquals(set(['aap', 'noot', 'mies']), set(dimConfigs.keySet())) self.assertFalse(dimConfigs.get('aap').hierarchical) self.assertTrue(dimConfigs.get('noot').hierarchical) self.assertTrue(dimConfigs.get('noot').multiValued) self.assertFalse(dimConfigs.get('mies').multiValued)
def __init__( self, commitTimeout=10, commitCount=100000, lruTaxonomyWriterCacheSize=4000, analyzer=dict(type="MerescoStandardAnalyzer"), similarity=dict(type="BM25Similarity"), mergePolicy=dict(type="TieredMergePolicy", maxMergeAtOnce=2, segmentsPerTier=8.0), fieldRegistry=FieldRegistry(), numberOfConcurrentTasks=6, cacheFacetOrdinals=True, verbose=True, ): local = locals() for name in SETTING_NAMES: self.__dict__['_' + name] = local[name] self.fieldRegistry = fieldRegistry
def testCreateFacet(self): fields = { 'field1': ['value1'], 'sorted.field3': ['value3'], 'untokenized.field4': ['value4'], 'untokenized.field5': ['value5', 'value6'], 'untokenized.field6': ['value5/value6'], 'untokenized.field7': ['valuex'], 'untokenized.field8': [['grandparent', 'parent', 'child'], ['parent2', 'child']] } fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry(drilldownFields=[ DrilldownField('untokenized.field4'), DrilldownField('untokenized.field5'), DrilldownField('untokenized.field6'), DrilldownField('untokenized.field8', hierarchical=True), ]) ) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' for field, values in fields.items(): for value in values: fields2LuceneDoc.addField(field, value) consume(fields2LuceneDoc.commit('unused')) fields = observer.calledMethods[0].kwargs['fields'] searchFields = [f for f in fields if not "path" in f] self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'], [f['name'] for f in searchFields]) facetsFields = [f for f in fields if "path" in f] self.assertEquals(6, len(facetsFields)) self.assertEquals([ ('untokenized.field8', ['grandparent', 'parent', 'child']), ('untokenized.field8', ['parent2', 'child']), ('untokenized.field6', ['value5/value6']), ('untokenized.field4', ['value4']), ('untokenized.field5', ['value5']), ('untokenized.field5', ['value6']), ], [(f['name'], f['path']) for f in facetsFields])
def testAddFacetField(self): fields2LuceneDoc = Fields2LuceneDoc( 'tsname', fieldRegistry=FieldRegistry(drilldownFields=[ DrilldownField('untokenized.field'), ])) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' fields2LuceneDoc.addField('field', 'value') fields2LuceneDoc.addFacetField('untokenized.field', 'untokenized value') consume(fields2LuceneDoc.commit('unused')) document = observer.calledMethods[0].kwargs['document'] facetsFields = [ FacetField.cast_(f) for f in document.getFields() if FacetField.instance_(f) ] self.assertEquals(1, len(facetsFields))
def testDrilldownFields(self): drilldownFields = [DrilldownField(name='aap'), DrilldownField(name='noot', hierarchical=True)] registry = FieldRegistry(drilldownFields=drilldownFields) registry.registerDrilldownField(fieldname='mies', multiValued=False) self.assertTrue(registry.isDrilldownField('aap')) self.assertTrue(registry.isDrilldownField('noot')) self.assertTrue(registry.isDrilldownField('mies')) self.assertFalse(registry.isDrilldownField('vuur')) self.assertFalse(registry.isHierarchicalDrilldown('aap')) self.assertTrue(registry.isHierarchicalDrilldown('noot')) self.assertTrue(registry.isMultivaluedDrilldown('aap')) self.assertTrue(registry.isMultivaluedDrilldown('noot')) self.assertFalse(registry.isMultivaluedDrilldown('mies')) self.assertTrue(registry.isUntokenized('mies')) field = registry.createFacetField("name", ["value"]) self.assertEqual({ "type": "FacetField", "name": "name", "path": ["value"] }, field)
def testWildcardQuery(self): self.fieldRegistry = FieldRegistry() expected = dict(type="WildcardQuery", term=dict(field="field", value="???*")) self.assertConversion(expected, cql='field=???*')
def main(reactor, port, serverPort, autocompletePort, databasePath, **kwargs): drilldownFields = [ DrilldownField('untokenized.field2'), DrilldownField('untokenized.field2.copy', indexFieldName='copy'), DrilldownField('untokenized.fieldHier', hierarchical=True) ] fieldRegistry = FieldRegistry(drilldownFields) fieldRegistry.register('intfield1', INTFIELD) fieldRegistry.register('intfield2', INTFIELD) fieldRegistry.register('intfield3', INTFIELD) fieldRegistry.register('intfield_missing', INTFIELD) fieldRegistry.register('sorted.intfield_missing', INTFIELD) luceneSettings = LuceneSettings( fieldRegistry=fieldRegistry, commitCount=30, commitTimeout=0.3, #analyzer=MerescoDutchStemmingAnalyzer(["field4", "field5"]), analyzer=dict(type="MerescoDutchStemmingAnalyzer", stemmingFields=['field4', 'field5']) ) http11_request = be((HttpRequest1_1(), (SocketPool(reactor=reactor, unusedTimeout=5, limits=dict(totalSize=100, destinationSize=10)),) )) lucene = be((Lucene(host="localhost", port=serverPort, name='main', settings=luceneSettings), (http11_request,) )) lucene2Settings = LuceneSettings(fieldRegistry=fieldRegistry, commitTimeout=0.1) lucene2 = be((Lucene(host="localhost", port=serverPort, name='main2', settings=lucene2Settings), (http11_request,) )) emptyLuceneSettings = LuceneSettings(commitTimeout=1) multiLuceneHelix = (MultiLucene(host='localhost', port=serverPort, defaultCore='main'), (Lucene(host='localhost', port=serverPort, name='empty-core', settings=emptyLuceneSettings), (http11_request,) ), (lucene,), (lucene2,), (http11_request,) ) storageComponent = be( (RetrieveDataToGetData(), (StorageComponentAdapter(), (MultiSequentialStorage(directory=join(databasePath, 'storage')),) ) ) ) return \ (Observable(), (ObservableHttpServer(reactor=reactor, port=port), (BasicHttpHandler(), (ApacheLogger(outputStream=stdout), (PathFilter("/info", excluding=[ '/info/version', '/info/name', '/update', '/sru', '/remote', '/via-remote-sru', ]), (DynamicHtml( [dynamicPath], reactor=reactor, indexPage='/info', additionalGlobals={ 'VERSION': version, } ), ) ), (PathFilter("/info/version"), (StringServer(version, ContentTypePlainText), ) ), (PathFilter("/info/name"), (StringServer('Meresco Lucene', ContentTypePlainText),) ), (PathFilter("/static"), (PathRename(lambda path: path[len('/static'):]), (FileServer(staticPath),) ) ), (PathFilter("/update_main", excluding=['/update_main2']), uploadHelix(lucene, storageComponent, drilldownFields, fieldRegistry=luceneSettings.fieldRegistry), ), (PathFilter("/update_main2"), uploadHelix(lucene2, storageComponent, drilldownFields, fieldRegistry=lucene2Settings.fieldRegistry), ), (PathFilter('/sru'), (SruParser(defaultRecordSchema='record'), (SruHandler(), (AdapterToLuceneQuery( defaultCore='main', coreConverters={ "main": QueryExpressionToLuceneQueryDict([], luceneSettings=luceneSettings), "main2": QueryExpressionToLuceneQueryDict([], luceneSettings=lucene2Settings), "empty-core": QueryExpressionToLuceneQueryDict([], luceneSettings=emptyLuceneSettings), }), multiLuceneHelix, ), (SRUTermDrilldown(defaultFormat='xml'),), (SruDuplicateCount(),), (storageComponent,), ) ) ), (PathFilter('/via-remote-sru'), (SruParser(defaultRecordSchema='record'), (SruHandler(), (LuceneRemote(host='localhost', port=port, path='/remote'),), (SRUTermDrilldown(defaultFormat='xml'),), (SruDuplicateCount(),), (storageComponent,), ) ) ), (PathFilter('/remote'), (LuceneRemoteService(reactor=reactor), (AdapterToLuceneQuery( defaultCore='main', coreConverters={ "main": QueryExpressionToLuceneQueryDict([], luceneSettings=luceneSettings), "main2": QueryExpressionToLuceneQueryDict([], luceneSettings=lucene2Settings), "empty-core": QueryExpressionToLuceneQueryDict([], luceneSettings=emptyLuceneSettings), }), multiLuceneHelix, ) ) ), (PathFilter('/autocomplete'), (Autocomplete(host='localhost', port=port, path='/autocomplete', defaultField='__all__', templateQuery='?', defaultLimit=5, shortname='?', description='?'), (lucene,), ) ), (PathFilter('/suggestion'), (SuggestionIndexComponent(host='localhost', port=autocompletePort), (http11_request,), ) ) ) ) ) )
class QueryExpressionToLuceneQueryDictTest(SeecrTestCase): def testTermQuery(self): self.assertConversion({ "type": "TermQuery", "term": { "field":"field", "value": "value", } }, QueryExpression.searchterm("field", "=", "value")) self.assertConversion({"term": {"field": "field", "value": "value"}, "type": "TermQuery"}, QueryExpression.searchterm("field", "=", "value")) def testRightHandSideIsLowercase(self): self.assertConversion({'boost': 1.0, 'term': {'field': 'unqualified', 'value': 'cat'}, 'type': 'TermQuery'}, QueryExpression.searchterm(term="CaT")) def testOneTermOutputWithANumber(self): self.assertConversion({'boost': 1.0, 'term': {'field': 'unqualified', 'value': '2005'}, 'type': 'TermQuery'}, QueryExpression.searchterm(term="2005")) def testMatchAllQuery(self): self.assertConversion({"type": "MatchAllDocsQuery"}, QueryExpression.searchterm(term="*")) def testUnqualifiedTermFields(self): self.unqualifiedFields = [('aField', 1.0)] self.assertConversion({"type": "TermQuery", "term": {"field": "aField", "value": "value"}, 'boost': 1.0}, QueryExpression.searchterm(term="value")) def testMultipleUnqualifiedTermFields(self): self.unqualifiedFields = [('aField', 1.0), ('oField', 2.0)] self.assertConversion({ "type": "BooleanQuery", "clauses": [ { "type": "TermQuery", "term": {"field": "aField", "value": "value"}, "boost": 1.0, "occur": "SHOULD" }, { "type": "TermQuery", "term": {"field": "oField", "value": "value"}, "boost": 2.0, "occur": "SHOULD" } ] }, QueryExpression.searchterm(term="value")) def testBooleanAndQuery(self): expr = QueryExpression.nested(operator='AND') expr.operands=[ QueryExpression.searchterm("field1", "=", "value1"), QueryExpression.searchterm("field2", "=", "value2") ] self.assertConversion({ "type": "BooleanQuery", "clauses": [ { "type": "TermQuery", "term": {"field": "field1", "value": "value1"}, "occur": "MUST" }, { "type": "TermQuery", "term": {"field": "field2", "value": "value2"}, "occur": "MUST" } ] }, expr) def testBooleanOrQuery(self): expr = QueryExpression.nested(operator='OR') expr.operands=[ QueryExpression.searchterm("field1", "=", "value1"), QueryExpression.searchterm("field2", "=", "value2") ] self.assertConversion({ "type": "BooleanQuery", "clauses": [ { "type": "TermQuery", "term": {"field": "field1", "value": "value1"}, "occur": "SHOULD" }, { "type": "TermQuery", "term": {"field": "field2", "value": "value2"}, "occur": "SHOULD" } ] }, expr) def testBooleanNotQuery(self): expr = QueryExpression.nested(operator='AND') expr.operands=[ QueryExpression.searchterm("field1", "=", "value1"), QueryExpression.searchterm("field2", "=", "value2") ] expr.operands[1].must_not = True self.assertConversion({ "type": "BooleanQuery", "clauses": [ { "type": "TermQuery", "term": {"field": "field1", "value": "value1"}, "occur": "MUST" }, { "type": "TermQuery", "term": {"field": "field2", "value": "value2"}, "occur": "MUST_NOT" } ] }, expr) def testBooleanNotQueryNested(self): expr = QueryExpression.nested(operator='AND') nestedNotExpr = QueryExpression.nested(operator='AND') nestedNotExpr.must_not = True nestedNotExpr.operands = [ QueryExpression.searchterm("field2", "=", "value2"), QueryExpression.searchterm("field3", "=", "value3") ] expr.operands = [QueryExpression.searchterm("field1", "=", "value1"), nestedNotExpr] self.assertConversion({ "type": "BooleanQuery", "clauses": [ { "type": "TermQuery", "term": {"field": "field1", "value": "value1"}, "occur": "MUST" }, { "type": "BooleanQuery", "occur": "MUST_NOT", "clauses": [ { "type": "TermQuery", "term": {"field": "field2", "value": "value2"}, "occur": "MUST" }, { "type": "TermQuery", "term": {"field": "field3", "value": "value3"}, "occur": "MUST" } ] } ] }, expr) def testNotExpression(self): expr = QueryExpression.searchterm("field", "=", "value") expr.must_not = True self.assertConversion({ "type": "BooleanQuery", "clauses": [ { "type": "MatchAllDocsQuery", "occur": "MUST" }, { "type": "TermQuery", "term": {"field": "field", "value": "value"}, "occur": "MUST_NOT" } ] }, expr) def testPhraseOutput(self): self.assertConversion({ "type": "PhraseQuery", "boost": 1.0, "terms": [ {"field": "unqualified", "value": "cats"}, {"field": "unqualified", "value": "dogs"} ] }, QueryExpression.searchterm(term='"cats dogs"')) # def testWhitespaceAnalyzer(self): # self._analyzer = WhitespaceAnalyzer() # query = PhraseQuery() # query.add(Term("unqualified", "kat")) # query.add(Term("unqualified", "hond")) # self.assertConversion(query, cql='"kat hond"') # def testPhraseOutputDoesNoDutchStemming(self): # self._analyzer = MerescoDutchStemmingAnalyzer() # query = PhraseQuery() # query.add(Term("unqualified", "katten")) # query.add(Term("unqualified", "honden")) # self.assertConversion(query, cql='"katten honden"') # def testDutchStemming(self): # self._analyzer = MerescoDutchStemmingAnalyzer() # query = BooleanQuery() # query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD) # query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD) # self.assertConversion(query, cql='honden') # def testDutchStemmingOnlyForGivenFields(self): # self._analyzer = MerescoDutchStemmingAnalyzer(['unqualified']) # query = BooleanQuery() # query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD) # query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD) # self.assertConversion(query, cql='honden') # query = TermQuery(Term("field", "honden")) # self.assertConversion(query, cql='field=honden') # def testIgnoreStemming(self): # self._ignoredStemmingForWords = ['kate', 'wageningen'] # self._analyzer = MerescoDutchStemmingAnalyzer() # query = TermQuery(Term("unqualified", "kate")) # self.assertConversion(query, cql='kate') # query = BooleanQuery() # query.add(TermQuery(Term("unqualified", "katten")), BooleanClause.Occur.SHOULD) # query.add(TermQuery(Term("unqualified", "kat")), BooleanClause.Occur.SHOULD) # self.assertConversion(query, cql='katten') def testPhraseQueryIsStandardAnalyzed(self): expected = dict(type="PhraseQuery", terms=[], boost=1.0) for term in ["vol.118", "2008", "nr.3", "march", "p.435-444"]: expected["terms"].append(dict(field="unqualified", value=term)) input = '"vol.118 (2008) nr.3 (March) p.435-444"' self.assertConversion(expected, cql=input) def testOneTermPhraseQueryUsesStandardAnalyzed(self): expected = dict(type="PhraseQuery", terms=[], boost=1.0) expected["terms"].append(dict(field="unqualified", value='aap')) expected["terms"].append(dict(field="unqualified", value='noot')) self.assertConversion(expected, cql='aap:noot') def testCreatesEmptyPhraseQueryIfNoValidCharsFound(self): expected = dict(type="PhraseQuery", terms=[], boost=1.0) self.assertConversion(expected, cql=':') def testStandardAnalyserWithoutStopWords(self): expected = dict(type="PhraseQuery", terms=[], boost=1.0) for term in ["no", "is", "the", "only", "option"]: expected["terms"].append(dict(field="unqualified", value=term)) self.assertConversion(expected, cql='"no is the only option"') def testDiacritics(self): expected = termQuery('title', 'moree') self.assertConversion(expected, cql='title=Moree') self.assertConversion(expected, cql='title=Morée') self.assertConversion(expected, cql='title=Morèe') # self._analyzer = MerescoDutchStemmingAnalyzer() # query = PhraseQuery() # query.add(Term("title", "waar")) # query.add(Term("title", "is")) # query.add(Term("title", "moree")) # query.add(Term("title", "vandaag")) # self.assertConversion(query, cql='title="Waar is Morée vandaag"') def testDiacriticsShouldBeNormalizedNFC(self): pq = dict(type="PhraseQuery", terms=[]) pq["terms"].append(dict(field="title", value="more")) pq["terms"].append(dict(field="title", value="e")) self.assertConversion(pq, cql='title=More\xcc\x81e') # Combined from unicodedata import normalize self.assertConversion(termQuery('title', 'moree'), cql=normalize('NFC', unicode('title=More\xcc\x81e'))) def testIndexRelationTermOutput(self): self.assertConversion(termQuery('animal', 'cats'), cql='animal=cats') query = dict(type="PhraseQuery", terms=[]) query["terms"].append(dict(field="animal", value="cats")) query["terms"].append(dict(field="animal", value="dogs")) self.assertConversion(query, cql='animal="cats dogs"') self.assertConversion(query, cql='animal="catS Dogs"') def testIndexRelationExactTermOutput(self): self.assertConversion(termQuery("animal", "hairy cats"), cql='animal exact "hairy cats"') self.assertConversion(termQuery("animal", "Capital Cats"), cql='animal exact "Capital Cats"') def testBoost(self): query = termQuery("title", "cats", boost=2.0) self.assertConversion(query, cql="title =/boost=2.0 cats") def testWildcards(self): query = prefixQuery('unqualified', 'prefix', 1.0) self.assertConversion(query, cql='prefix*') self.assertConversion(query, cql='PREfix*') query = prefixQuery('field', 'prefix') self.assertConversion(query, cql='field="PREfix*"') self.assertConversion(query, cql='field=prefix*') query = prefixQuery('field', 'oc-0123') self.assertConversion(query, cql='field="oc-0123*"') query = termQuery('field', 'p') self.assertConversion(query, cql='field="P*"') #only prefix queries for now query = termQuery('field', 'post') self.assertConversion(query, cql='field="*post"') query = termQuery('field', 'prefix') self.assertConversion(query, cql='field=prefix**') self.unqualifiedFields = [("field0", 0.2), ("field1", 2.0)] query = dict(type="BooleanQuery", clauses=[]) query["clauses"].append(prefixQuery("field0", "prefix", 0.2)) query["clauses"][0]["occur"] = "SHOULD" query["clauses"].append(prefixQuery("field1", "prefix", 2.0)) query["clauses"][1]["occur"] = "SHOULD" self.assertConversion(query, cql="prefix*") def testMagicExact(self): exactResult = self.convert(cql='animal exact "cats dogs"') self.fieldRegistry = FieldRegistry() self.fieldRegistry.register('animal', STRINGFIELD) self.assertConversion(exactResult, cql='animal = "cats dogs"') def testTextRangeQuery(self): # (field, lowerTerm, upperTerm, includeLower, includeUpper) q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm='value', upperTerm=None, includeLower=False, includeUpper=False) self.assertConversion(q, cql='field > value') q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm='value', upperTerm=None, includeLower=True, includeUpper=False) self.assertConversion(q, cql='field >= value') q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm=None, upperTerm='value', includeLower=False, includeUpper=False) self.assertConversion(q, cql='field < value') q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm=None, upperTerm='value', includeLower=False, includeUpper=True) self.assertConversion(q, cql='field <= value') def testIntRangeQuery(self): # (field, lowerTerm, upperTerm, includeLower, includeUpper) q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=1, upperTerm=None, includeLower=False, includeUpper=False) self.assertConversion(q, cql='intField > 1') q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=1, upperTerm=None, includeLower=True, includeUpper=False) self.assertConversion(q, cql='intField >= 1') q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=None, upperTerm=3, includeLower=False, includeUpper=False) self.assertConversion(q, cql='intField < 3') q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=None, upperTerm=3, includeLower=False, includeUpper=True) self.assertConversion(q, cql='intField <= 3') def testLongRangeQuery(self): # (field, lowerTerm, upperTerm, includeLower, includeUpper) q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=1, upperTerm=None, includeLower=False, includeUpper=False) self.assertConversion(q, cql='longField > 1') q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=1, upperTerm=None, includeLower=True, includeUpper=False) self.assertConversion(q, cql='longField >= 1') q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=None, upperTerm=3, includeLower=False, includeUpper=False) self.assertConversion(q, cql='longField < 3') q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=None, upperTerm=3, includeLower=False, includeUpper=True) self.assertConversion(q, cql='longField <= 3') def testDrilldownFieldQuery(self): self.fieldRegistry = FieldRegistry([DrilldownField('field', hierarchical=True)]) self.assertConversion(dict(type="TermQuery", term=dict(field="field", path=["value"], type="DrillDown")), cql="field = value") self.assertConversion(dict(type="TermQuery", term=dict(field="field", path=["value", "value1"], type="DrillDown")), cql="field = \"value>value1\"") def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self): self.fieldRegistry = FieldRegistry() self.fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELD) self.unqualifiedFields = [("unqualified", 1.0), ('noTermFreqField', 2.0)] expected = dict(type="PhraseQuery", terms=[ dict(field="unqualified", value="phrase"), dict(field="unqualified", value="query") ], boost=1.0) self.assertConversion(expected, cql='"phrase query"') def testQueryForIntField(self): expected = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=5, upperTerm=5, includeLower=True, includeUpper=True) self.assertConversion(expected, cql="intField=5") expected = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=5, upperTerm=5, includeLower=True, includeUpper=True) self.assertConversion(expected, cql="intField exact 5") def testQueryForLongField(self): expected = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=long(5), upperTerm=long(5), includeLower=True, includeUpper=True) self.assertConversion(expected, cql="longField=5") def testQueryForDoubleField(self): expected = dict(type="RangeQuery", rangeType="Double", field='range.double.field', lowerTerm=float(5), upperTerm=float(5), includeLower=True, includeUpper=True) self.assertConversion(expected, cql="range.double.field=5") def testWildcardQuery(self): self.fieldRegistry = FieldRegistry() expected = dict(type="WildcardQuery", term=dict(field="field", value="???*")) self.assertConversion(expected, cql='field=???*') def testUnsupportedCQL(self): for relation in ['<>']: try: self.convert(cql='index %(relation)s term' % locals()) self.fail() except UnsupportedCQL: pass def convert(self, expression=None, cql=None): if expression is None: expression = cqlToExpression(parseCql(cql)) unqualifiedFields = getattr(self, 'unqualifiedFields', [("unqualified", 1.0)]) settings = LuceneSettings() if hasattr(self, '_analyzer'): settings.analyzer = self._analyzer if hasattr(self, 'fieldRegistry'): settings.fieldRegistry = self.fieldRegistry else: settings.fieldRegistry = FieldRegistry() settings.fieldRegistry.register("intField", fieldDefinition=INTFIELD) settings.fieldRegistry.register("longField", fieldDefinition=LONGFIELD) converter = QueryExpressionToLuceneQueryDict( unqualifiedTermFields=unqualifiedFields, luceneSettings=settings, ignoreStemmingForWords=getattr(self, '_ignoredStemmingForWords', None) ) return converter.convert(expression) def assertConversion(self, expected, expression=None, cql=None): result = self.convert(expression=expression, cql=cql) self.assertEquals(expected, result)
def testMagicExact(self): exactResult = self.composer.compose(parseCql('animal exact "cats dogs"')) fieldRegistry = FieldRegistry() fieldRegistry.register('animal', StringField.TYPE_NOT_STORED) self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry)) self.assertConversion(exactResult, 'animal = "cats dogs"')
def setUp(self): super(LuceneQueryComposerTest, self).setUp() fieldRegistry = FieldRegistry() fieldRegistry.register("intField", fieldDefinition=INTFIELD) fieldRegistry.register("longField", fieldDefinition=LONGFIELD) self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
def testMagicExact(self): exactResult = self.convert(cql='animal exact "cats dogs"') self.fieldRegistry = FieldRegistry() self.fieldRegistry.register('animal', STRINGFIELD) self.assertConversion(exactResult, cql='animal = "cats dogs"')
def testSortField(self): registry = FieldRegistry() registry.register("sorted.longfield", fieldDefinition=LONGFIELD) registry.register("sorted.intfield", fieldDefinition=INTFIELD) registry.register("sorted.stringfield", fieldDefinition=STRINGFIELD) self.assertEqual("Long", registry.sortFieldType("sorted.longfield")) self.assertEqual(None, registry.defaultMissingValueForSort("sorted.longfield", True)) self.assertEqual("Int", registry.sortFieldType("sorted.intfield")) self.assertEqual(None, registry.defaultMissingValueForSort("sorted.intfield", True)) self.assertEqual("String", registry.sortFieldType("sorted.stringfield")) self.assertEqual("STRING_FIRST", registry.defaultMissingValueForSort("sorted.stringfield", True)) self.assertEqual("STRING_LAST", registry.defaultMissingValueForSort("sorted.stringfield", False)) self.assertEqual(None, registry.defaultMissingValueForSort("score", False)) field = registry.createField('sorted.longfield', 'id:1') self.assertEqual({'name': 'sorted.longfield', 'type': 'LongField', 'value': 'id:1', 'sort': True}, field)
def testMagicExact(self): exactResult = self._convert('animal exact "cats dogs"') self.fieldRegistry = FieldRegistry() self.fieldRegistry.register('animal', STRINGFIELD) self.assertEquals(exactResult, self._convert('animal = "cats dogs"'))
class QueryExpressionToLuceneQueryDictTest(SeecrTestCase): def testTermQuery(self): self.assertEquals( { "type": "TermQuery", "term": { "field":"field", "value": "value", } }, self._convert(QueryExpression.searchterm("field", "=", "value"))) self.assertEquals( {"term": {"field": "field", "value": "value"}, "type": "TermQuery"}, self._convert(QueryExpression.searchterm("field", "=", "value"))) def testRightHandSideIsLowercase(self): self.assertEquals( {'boost': 1.0, 'term': {'field': 'unqualified', 'value': 'cat'}, 'type': 'TermQuery'}, self._convert(QueryExpression.searchterm(term="CaT"))) def testOneTermOutputWithANumber(self): self.assertEquals( {'boost': 1.0, 'term': {'field': 'unqualified', 'value': '2005'}, 'type': 'TermQuery'}, self._convert(QueryExpression.searchterm(term="2005"))) def testMatchAllQuery(self): self.assertEquals( {"type": "MatchAllDocsQuery"}, self._convert(QueryExpression.searchterm(term="*"))) def testUnqualifiedTermFields(self): self.unqualifiedFields = [('aField', 1.0)] self.assertEquals( {"type": "TermQuery", "term": {"field": "aField", "value": "value"}, 'boost': 1.0}, self._convert(QueryExpression.searchterm(term="value"))) def testUnqualifiedTermFieldsWithNestedExpression(self): self.unqualifiedFields = [('aField', 1.0)] expr = QueryExpression.nested(operator='AND') expr.operands = [ QueryExpression.searchterm(term="value1"), QueryExpression.searchterm(term="value2") ] self.assertEquals({ 'type': 'BooleanQuery', 'clauses': [ {'type': 'TermQuery', 'occur': 'MUST', 'term': {'field': 'aField', 'value': u'value1'}, 'boost': 1.0}, {'type': 'TermQuery', 'occur': 'MUST', 'term': {'field': 'aField', 'value': u'value2'}, 'boost': 1.0} ], }, self._convert(expr)) def testMultipleUnqualifiedTermFields(self): self.unqualifiedFields = [('aField', 1.0), ('oField', 2.0)] self.assertEquals( { "type": "BooleanQuery", "clauses": [ { "type": "TermQuery", "term": {"field": "aField", "value": "value"}, "boost": 1.0, "occur": "SHOULD" }, { "type": "TermQuery", "term": {"field": "oField", "value": "value"}, "boost": 2.0, "occur": "SHOULD" } ] }, self._convert(QueryExpression.searchterm(term="value"))) def testBooleanAndQuery(self): expr = QueryExpression.nested(operator='AND') expr.operands = [ QueryExpression.searchterm("field1", "=", "value1"), QueryExpression.searchterm("field2", "=", "value2") ] self.assertEquals( { "type": "BooleanQuery", "clauses": [ { "type": "TermQuery", "term": {"field": "field1", "value": "value1"}, "occur": "MUST" }, { "type": "TermQuery", "term": {"field": "field2", "value": "value2"}, "occur": "MUST" } ] }, self._convert(expr)) def testBooleanOrQuery(self): expr = QueryExpression.nested(operator='OR') expr.operands=[ QueryExpression.searchterm("field1", "=", "value1"), QueryExpression.searchterm("field2", "=", "value2") ] self.assertEquals( { "type": "BooleanQuery", "clauses": [ { "type": "TermQuery", "term": {"field": "field1", "value": "value1"}, "occur": "SHOULD" }, { "type": "TermQuery", "term": {"field": "field2", "value": "value2"}, "occur": "SHOULD" } ] }, self._convert(expr)) def testBooleanNotQuery(self): expr = QueryExpression.nested(operator='AND') expr.operands=[ QueryExpression.searchterm("field1", "=", "value1"), QueryExpression.searchterm("field2", "=", "value2") ] expr.operands[1].must_not = True self.assertEquals( { "type": "BooleanQuery", "clauses": [ { "type": "TermQuery", "term": {"field": "field1", "value": "value1"}, "occur": "MUST" }, { "type": "TermQuery", "term": {"field": "field2", "value": "value2"}, "occur": "MUST_NOT" } ] }, self._convert(expr)) def testBooleanNotQueryNested(self): expr = QueryExpression.nested(operator='AND') nestedNotExpr = QueryExpression.nested(operator='AND') nestedNotExpr.must_not = True nestedNotExpr.operands = [ QueryExpression.searchterm("field2", "=", "value2"), QueryExpression.searchterm("field3", "=", "value3") ] expr.operands = [QueryExpression.searchterm("field1", "=", "value1"), nestedNotExpr] self.assertEquals( { "type": "BooleanQuery", "clauses": [ { "type": "TermQuery", "term": {"field": "field1", "value": "value1"}, "occur": "MUST" }, { "type": "BooleanQuery", "occur": "MUST_NOT", "clauses": [ { "type": "TermQuery", "term": {"field": "field2", "value": "value2"}, "occur": "MUST" }, { "type": "TermQuery", "term": {"field": "field3", "value": "value3"}, "occur": "MUST" } ] } ] }, self._convert(expr)) def testNotExpression(self): expr = QueryExpression.searchterm("field", "=", "value") expr.must_not = True self.assertEquals( { "type": "BooleanQuery", "clauses": [ { "type": "MatchAllDocsQuery", "occur": "MUST" }, { "type": "TermQuery", "term": {"field": "field", "value": "value"}, "occur": "MUST_NOT" } ] }, self._convert(expr)) def testPhraseOutput(self): self.assertEquals( { "type": "PhraseQuery", "boost": 1.0, "terms": [ {"field": "unqualified", "value": "cats"}, {"field": "unqualified", "value": "dogs"} ] }, self._convert(QueryExpression.searchterm(term='"cats dogs"'))) # def testWhitespaceAnalyzer(self): # self._analyzer = WhitespaceAnalyzer() # query = PhraseQuery() # query.add(Term("unqualified", "kat")) # query.add(Term("unqualified", "hond")) # self.assertEquals(query, self._convert('"kat hond"')) # def testPhraseOutputDoesNoDutchStemming(self): # self._analyzer = MerescoDutchStemmingAnalyzer() # query = PhraseQuery() # query.add(Term("unqualified", "katten")) # query.add(Term("unqualified", "honden")) # self.assertEquals(query, self._convert('"katten honden"')) # def testDutchStemming(self): # self._analyzer = MerescoDutchStemmingAnalyzer() # query = BooleanQuery() # query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD) # query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD) # self.assertEquals(query, self._convert('honden')) # def testDutchStemmingOnlyForGivenFields(self): # self._analyzer = MerescoDutchStemmingAnalyzer(['unqualified']) # query = BooleanQuery() # query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD) # query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD) # self.assertEquals(query, self._convert('honden')) # query = TermQuery(Term("field", "honden")) # self.assertEquals(query, self._convert('field=honden')) # def testIgnoreStemming(self): # self._ignoredStemmingForWords = ['kate', 'wageningen'] # self._analyzer = MerescoDutchStemmingAnalyzer() # query = TermQuery(Term("unqualified", "kate")) # self.assertEquals(query, 'kate') # query = BooleanQuery() # query.add(TermQuery(Term("unqualified", "katten")), BooleanClause.Occur.SHOULD) # query.add(TermQuery(Term("unqualified", "kat")), BooleanClause.Occur.SHOULD) # self.assertEquals(query, self._convert('katten')) def testPhraseQueryIsStandardAnalyzed(self): expected = dict(type="PhraseQuery", terms=[], boost=1.0) for term in ["vol.118", "2008", "nr.3", "march", "p.435-444"]: expected["terms"].append(dict(field="unqualified", value=term)) self.assertEquals(expected, self._convert('"vol.118 (2008) nr.3 (March) p.435-444"')) def testOneTermPhraseQueryUsesStandardAnalyzed(self): expected = dict(type="PhraseQuery", terms=[], boost=1.0) expected["terms"].append(dict(field="unqualified", value='aap')) expected["terms"].append(dict(field="unqualified", value='noot')) self.assertEquals(expected, self._convert('aap:noot')) def testCreatesEmptyPhraseQueryIfNoValidCharsFound(self): expected = dict(type="PhraseQuery", terms=[], boost=1.0) self.assertEquals(expected, self._convert(':')) def testStandardAnalyserWithoutStopWords(self): expected = dict(type="PhraseQuery", terms=[], boost=1.0) for term in ["no", "is", "the", "only", "option"]: expected["terms"].append(dict(field="unqualified", value=term)) self.assertEquals(expected, self._convert('"no is the only option"')) def testDiacritics(self): expected = termQuery('title', 'moree') self.assertEquals(expected, self._convert('title=Moree')) self.assertEquals(expected, self._convert('title=Morée')) self.assertEquals(expected, self._convert('title=Morèe')) # self._analyzer = MerescoDutchStemmingAnalyzer() # query = PhraseQuery() # query.add(Term("title", "waar")) # query.add(Term("title", "is")) # query.add(Term("title", "moree")) # query.add(Term("title", "vandaag")) # self.assertEquals(query, self._convert('title="Waar is Morée vandaag"')) def testDiacriticsShouldBeNormalizedNFC(self): pq = dict(type="PhraseQuery", terms=[]) pq["terms"].append(dict(field="title", value="more")) pq["terms"].append(dict(field="title", value="e")) self.assertEquals(pq, self._convert('title=More\xcc\x81e')) # Combined from unicodedata import normalize self.assertEquals( termQuery('title', 'moree'), self._convert(normalize('NFC', unicode('title=More\xcc\x81e')))) def testIndexRelationTermOutput(self): self.assertEquals( termQuery('animal', 'cats'), self._convert('animal=cats')) query = dict(type="PhraseQuery", terms=[]) query["terms"].append(dict(field="animal", value="cats")) query["terms"].append(dict(field="animal", value="dogs")) self.assertEquals(query, self._convert('animal="cats dogs"')) self.assertEquals(query, self._convert('animal="catS Dogs"')) def testIndexRelationExactTermOutput(self): self.assertEquals( termQuery("animal", "hairy cats"), self._convert('animal exact "hairy cats"')) self.assertEquals( termQuery("animal", "Capital Cats"), self._convert('animal exact "Capital Cats"')) def testBoost(self): query = termQuery("title", "cats", boost=2.0) self.assertEquals(query, self._convert("title =/boost=2.0 cats")) def testWildcards(self): query = prefixQuery('unqualified', 'prefix', 1.0) self.assertEquals(query, self._convert('prefix*')) self.assertEquals(query, self._convert('PREfix*')) query = prefixQuery('field', 'prefix') self.assertEquals(query, self._convert('field="PREfix*"')) self.assertEquals(query, self._convert('field=prefix*')) query = prefixQuery('field', 'oc-0123') self.assertEquals(query, self._convert('field="oc-0123*"')) query = termQuery('field', 'p') self.assertEquals(query, self._convert('field="P*"')) #only prefix queries for now query = termQuery('field', 'post') self.assertEquals(query, self._convert('field="*post"')) query = termQuery('field', 'prefix') self.assertEquals(query, self._convert('field=prefix**')) self.unqualifiedFields = [("field0", 0.2), ("field1", 2.0)] query = dict(type="BooleanQuery", clauses=[]) query["clauses"].append(prefixQuery("field0", "prefix", 0.2)) query["clauses"][0]["occur"] = "SHOULD" query["clauses"].append(prefixQuery("field1", "prefix", 2.0)) query["clauses"][1]["occur"] = "SHOULD" self.assertEquals(query, self._convert("prefix*")) def testMagicExact(self): exactResult = self._convert('animal exact "cats dogs"') self.fieldRegistry = FieldRegistry() self.fieldRegistry.register('animal', STRINGFIELD) self.assertEquals(exactResult, self._convert('animal = "cats dogs"')) def testTextRangeQuery(self): # (field, lowerTerm, upperTerm, includeLower, includeUpper) q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm='value', upperTerm=None, includeLower=False, includeUpper=True) self.assertEquals(q, self._convert('field > value')) q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm='value', upperTerm=None, includeLower=True, includeUpper=True) self.assertEquals(q, self._convert('field >= value')) q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm=None, upperTerm='value', includeLower=True, includeUpper=False) self.assertEquals(q, self._convert('field < value')) q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm=None, upperTerm='value', includeLower=True, includeUpper=True) self.assertEquals(q, self._convert('field <= value')) def testIntRangeQuery(self): # (field, lowerTerm, upperTerm, includeLower, includeUpper) q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=1, upperTerm=None, includeLower=False, includeUpper=True) self.assertEquals(q, self._convert('intField > 1')) q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=1, upperTerm=None, includeLower=True, includeUpper=True) self.assertEquals(q, self._convert('intField >= 1')) q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=None, upperTerm=3, includeLower=True, includeUpper=False) self.assertEquals(q, self._convert('intField < 3')) q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=None, upperTerm=3, includeLower=True, includeUpper=True) self.assertEquals(q, self._convert('intField <= 3')) q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=3, upperTerm=3, includeLower=True, includeUpper=True) self.assertEquals(q, self._convert('intField = 3')) self.assertEquals(q, self._convert(QueryExpression.searchterm(index='intField', relation='exact', term=3))) self.assertEquals(q, self._convert(QueryExpression.searchterm(index='intField', relation='=', term=3))) def testLongRangeQuery(self): # (field, lowerTerm, upperTerm, includeLower, includeUpper) q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=1, upperTerm=None, includeLower=False, includeUpper=True) self.assertEquals(q, self._convert('longField > 1')) q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=1, upperTerm=None, includeLower=True, includeUpper=True) self.assertEquals(q, self._convert('longField >= 1')) q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=None, upperTerm=3, includeLower=True, includeUpper=False) self.assertEquals(q, self._convert('longField < 3')) q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=None, upperTerm=3, includeLower=True, includeUpper=True) self.assertEquals(q, self._convert('longField <= 3')) def testDrilldownFieldQuery(self): self.fieldRegistry = FieldRegistry([DrilldownField('field', hierarchical=True)]) self.assertEquals( dict(type="TermQuery", term=dict(field="field", path=["value"], type="DrillDown")), self._convert("field = value")) self.assertEquals( dict(type="TermQuery", term=dict(field="field", path=["value", "value1"], type="DrillDown")), self._convert("field = \"value>value1\"")) def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self): self.fieldRegistry = FieldRegistry() self.fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELD) self.unqualifiedFields = [("unqualified", 1.0), ('noTermFreqField', 2.0)] expected = dict(type="PhraseQuery", terms=[ dict(field="unqualified", value="phrase"), dict(field="unqualified", value="query") ], boost=1.0) self.assertEquals(expected, self._convert('"phrase query"')) def testQueryForIntField(self): expected = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=5, upperTerm=5, includeLower=True, includeUpper=True) self.assertEquals(expected, self._convert("intField=5")) expected = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=5, upperTerm=5, includeLower=True, includeUpper=True) self.assertEquals(expected, self._convert("intField exact 5")) def testQueryForLongField(self): expected = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=long(5), upperTerm=long(5), includeLower=True, includeUpper=True) self.assertEquals(expected, self._convert("longField=5")) def testQueryForDoubleField(self): expected = dict(type="RangeQuery", rangeType="Double", field='range.double.field', lowerTerm=float(5), upperTerm=float(5), includeLower=True, includeUpper=True) self.assertEquals(expected, self._convert("range.double.field=5")) def testWildcardQuery(self): self.fieldRegistry = FieldRegistry() expected = dict(type="WildcardQuery", term=dict(field="field", value="???*")) self.assertEquals(expected, self._convert('field=???*')) def testUnsupportedCQL(self): for relation in ['<>']: try: self._convert('index %(relation)s term' % locals()) self.fail() except UnsupportedCQL: pass def testPerQueryUnqualifiedFields(self): self.unqualifiedFields = [('aField', 1.0)] converter = self._prepareConverter() self.assertEquals({ "type": "BooleanQuery", "clauses": [{ "type": "TermQuery", "term": {"field": "aField", "value": "value"}, 'boost': 2.0, 'occur': 'SHOULD' }, { "type": "TermQuery", "term": {"field": "anotherField", "value": "value"}, 'boost': 3.0, 'occur': 'SHOULD' }]}, converter.convert( QueryExpression.searchterm(term="value"), unqualifiedTermFields=[('aField', 2.0), ('anotherField', 3.0)])) def testReallyIgnoreAnalyzedAwayTerms(self): self.assertEquals({'boost': 1.0, 'terms': [], 'type': 'PhraseQuery'}, self._convert('.')) # will not yield any results, but that's what's desired self.assertDictEquals({'terms': [], 'type': 'PhraseQuery'}, self._convert("abc=:;+")) self.assertDictEquals({'type': 'BooleanQuery', 'clauses': [{'boost': 1.0, 'term': {'field': 'unqualified', 'value': u'abc'}, 'type': 'TermQuery', 'occur': 'MUST'}, {'boost': 1.0, 'term': {'field': 'unqualified', 'value': u'def'}, 'type': 'TermQuery', 'occur': 'MUST'}]}, self._convert("abc AND :;+ AND def")) self.unqualifiedFields = [("unqualified", 1.0), ("moreUnqualified", 1.0)] self.assertDictEquals({ 'clauses': [{ 'clauses': [{ 'boost': 1.0, 'occur': 'SHOULD', 'term': {'field': 'unqualified', 'value': u'abc'}, 'type': 'TermQuery' }, { 'boost': 1.0, 'occur': 'SHOULD', 'term': {'field': 'moreUnqualified', 'value': u'abc'}, 'type': 'TermQuery' }], 'occur': 'MUST', 'type': 'BooleanQuery' }, { 'clauses': [{ 'boost': 1.0, 'occur': 'SHOULD', 'term': {'field': 'unqualified', 'value': u'def'}, 'type': 'TermQuery' }, { 'boost': 1.0, 'occur': 'SHOULD', 'term': {'field': 'moreUnqualified', 'value': u'def'}, 'type': 'TermQuery' }], 'occur': 'MUST', 'type': 'BooleanQuery' }], 'type': 'BooleanQuery'}, self._convert("abc AND :;+ AND def")) def testOtherCoreTermQuery(self): query = ComposedQuery('thisCore') query.cores.add('otherCore') query.addMatch( dict(core='thisCore', uniqueKey='A'), dict(core='otherCore', uniqueKey='B') ) self.assertEquals({ "type": "RelationalLuceneQuery", # should this not be 'joined' to own core somehow? (with MatchAllDocs) "core": "otherCore", "collectKeyName": "B", "filterKeyName": "B", "query": { "type": "TermQuery", "term": { "field": "field", "value": "value", } }}, self._convert(QueryExpression.searchterm("otherCore.field", "=", "value"), composedQuery=query)) @skip('not yet implemented') def testOtherCoreAndQuery(self): self.assertEquals({ 'type': 'JoinAndQuery', 'first': { "type": "RelationalLuceneQuery", # should this not be 'joined' to own core somehow? "core": "thisCore", "collectKeyName": "A", # where does this keyName come from? "filterKeyName": "A", "query": { "type": "TermQuery", "term": { "field":"field0", "value": "value", } } }, 'second': { "type": "RelationalLuceneQuery", # should this not be 'joined' to own core somehow? "core": "otherCore", "collectKeyName": "A", # where does this keyName come from? "filterKeyName": "A", "query": { "type": "TermQuery", "term": { "field":"field", "value": "value", } } } }, self._convert( QueryExpression(operator='AND', operands=[ QueryExpression.searchterm('field0', '=', 'value'), QueryExpression.searchterm("otherCore.field", "=", "value") ]) ) ) def _convert(self, input, **kwargs): return self._prepareConverter().convert(self._makeExpression(input), **kwargs) def _prepareConverter(self): unqualifiedFields = getattr(self, 'unqualifiedFields', [("unqualified", 1.0)]) return QueryExpressionToLuceneQueryDict( unqualifiedTermFields=unqualifiedFields, luceneSettings=self._prepareLuceneSettings(), ignoreStemmingForWords=getattr(self, '_ignoredStemmingForWords', None) ) def _prepareLuceneSettings(self): settings = LuceneSettings() if hasattr(self, '_analyzer'): settings.analyzer = self._analyzer if hasattr(self, 'fieldRegistry'): settings.fieldRegistry = self.fieldRegistry else: settings.fieldRegistry = FieldRegistry() settings.fieldRegistry.register("intField", fieldDefinition=INTFIELD) settings.fieldRegistry.register("longField", fieldDefinition=LONGFIELD) return settings def _makeExpression(self, input): return cqlToExpression(parseCql(input)) if isinstance(input, basestring) else input
def testPhraseQueryPossible(self): registry = FieldRegistry() registry.register('fieldname', NO_TERMS_FREQUENCY_FIELD) self.assertFalse(registry.phraseQueryPossible('fieldname')) self.assertTrue(registry.phraseQueryPossible('other.fieldname'))
def testIsIndexField(self): registry = FieldRegistry(drilldownFields=[DrilldownField(f) for f in ['field2', 'field3']], termVectorFields=['field1', 'field2']) self.assertTrue(registry.isIndexField('field1')) self.assertTrue(registry.isIndexField('field2')) self.assertFalse(registry.isIndexField('field3')) self.assertTrue(registry.isIndexField('field4'))
from digitalecollectie.erfgeo import VERSION_STRING from digitalecollectie.erfgeo.namespaces import namespaces from digitalecollectie.erfgeo.maybecombinewithsummary import COMBINED_METADATA_PREFIX from digitalecollectie.erfgeo.index.constants import ALL_FIELD from digitalecollectie.erfgeo.index.lxmltofieldslist import LxmlToFieldsList from digitalecollectie.erfgeo.index.fieldslisttolucenedocument import FieldsListToLuceneDocument from digitalecollectie.erfgeo.index.indexfields import IndexFields workingPath = dirname(abspath(__file__)) unqualifiedTermFields = [(ALL_FIELD, 1.0)] fieldRegistry = FieldRegistry(drilldownFields=IndexFields.drilldownFields) fieldRegistry.register('dcterms:spatial.geo:long', fieldDefinition=DOUBLEFIELD) fieldRegistry.register('dcterms:spatial.geo:lat', fieldDefinition=DOUBLEFIELD) parseHugeOptions = dict(huge_tree=True, remove_blank_text=True) def createErfGeoEnrichmentPeriodicDownloadHelix(reactor, lucene, config, statePath): erfgeoEnrichPortNumber = int(config['erfgeoEnrich.portNumber']) downloadName = 'erfgeoEnrich-%s' % COMBINED_METADATA_PREFIX erfGeoEnrichPeriodicDownload = PeriodicDownload( reactor, host='127.0.0.1', port=erfgeoEnrichPortNumber, name=downloadName, autoStart=True)
def testWildcardQuery(self): self.fieldRegistry = FieldRegistry() expected = dict(type="WildcardQuery", term=dict(field="field", value="???*")) self.assertEquals(expected, self._convert('field=???*'))