Exemplo n.º 1
0
 def testSpecificField(self):
     registry = FieldRegistry()
     field = registry.createField('fieldname', 'value')
     self.assertFalse(field.fieldType().stored())
     registry.register('fieldname', StringField.TYPE_STORED)
     field = registry.createField('fieldname', 'value')
     self.assertTrue(field.fieldType().stored())
Exemplo n.º 2
0
 def testDefault(self):
     registry = FieldRegistry()
     field = registry.createField('__id__', 'id:1')
     self.assertFalse(field.fieldType().tokenized())
     self.assertTrue(field.fieldType().stored())
     self.assertTrue(field.fieldType().indexed())
     self.assertTrue(registry.isUntokenized('__id__'))
Exemplo n.º 3
0
 def testAddWithoutIdentifier(self):
     registry = FieldRegistry()
     fields = [registry.createField("id", "id1")]
     consume(self._lucene.addDocument(fields=fields))
     self.assertEqual(1, len(self.post))
     self.assertEqual('/lucene/update/?', self.post[0]['path'])
     self.assertEqual('[{"type": "TextField", "name": "id", "value": "id1"}]', self.post[0]['data'])
Exemplo n.º 4
0
 def testIsUntokenized(self):
     registry = FieldRegistry()
     self.assertTrue(registry.isUntokenized('untokenized.some.field'))
     registry.register('fieldname', StringField.TYPE_NOT_STORED)
     self.assertTrue(registry.isUntokenized('fieldname'))
     registry.register('fieldname', TextField.TYPE_NOT_STORED)
     self.assertFalse(registry.isUntokenized('fieldname'))
Exemplo n.º 5
0
 def __init__(
     self,
     commitTimeout=10,
     commitCount=100000,
     multithreaded=True,
     readonly=False,
     lruTaxonomyWriterCacheSize=4000,
     analyzer=MerescoStandardAnalyzer(),
     similarity=BM25Similarity(),
     fieldRegistry=FieldRegistry(),
     maxMergeAtOnce=2,
     segmentsPerTier=8.0,
     numberOfConcurrentTasks=6,
     verbose=True,
 ):
     self.commitTimeout = commitTimeout
     self.commitCount = commitCount
     self.multithreaded = multithreaded
     self.readonly = readonly
     self.lruTaxonomyWriterCacheSize = lruTaxonomyWriterCacheSize
     self.analyzer = analyzer
     self.similarity = similarity
     self.fieldRegistry = fieldRegistry
     self.maxMergeAtOnce = maxMergeAtOnce
     self.segmentsPerTier = segmentsPerTier
     self.numberOfConcurrentTasks = numberOfConcurrentTasks
     self.verbose = verbose
Exemplo n.º 6
0
 def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self):
     fieldRegistry = FieldRegistry()
     fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELDTYPE)
     self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0), ('noTermFreqField', 2.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
     expected = PhraseQuery()
     expected.add(Term("unqualified", "phrase query"))
     self.assertConversion(expected, '"phrase query"')
Exemplo n.º 7
0
 def testGenericDrilldownFields(self):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         registry = FieldRegistry(isDrilldownFieldFunction=lambda name: name.startswith('drilldown'))
         self.assertTrue(registry.isDrilldownField('drilldown.aap'))
         self.assertTrue(registry.isDrilldownField('drilldown.noot'))
         self.assertFalse(registry.isDrilldownField('noot'))
Exemplo n.º 8
0
 def testTermVectorsForField(self):
     registry = FieldRegistry(termVectorFields=['field1', 'field2'])
     self.assertTrue(registry.isTermVectorField('field1'))
     self.assertTrue(registry.isTermVectorField('field2'))
     self.assertFalse(registry.isTermVectorField('field3'))
     field = registry.createField('field1', 'id:1')
     self.assertEquals({
             "type": "TextField",
             "name": "field1",
             "value": "id:1",
             "termVectors": True,
         }, field)
     field = registry.createField('field2', 'id:1')
     self.assertEquals({
             "type": "TextField",
             "name": "field2",
             "value": "id:1",
             "termVectors": True,
         }, field)
     field = registry.createField('field3', 'id:1')
     self.assertEquals({
             "type": "TextField",
             "name": "field3",
             "value": "id:1",
         }, field)
Exemplo n.º 9
0
 def testNoTermsFreqField(self):
     registry = FieldRegistry()
     registry.register('fieldname', NO_TERMS_FREQUENCY_FIELD)
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "NoTermsFrequencyField",
             "name": "fieldname",
             "value": "value",
         }, field)
Exemplo n.º 10
0
 def testDefault(self):
     registry = FieldRegistry()
     field = registry.createField('__id__', 'id:1')
     self.assertEquals({
             "type": "StringField",
             "name": "__id__",
             "value": "id:1",
             "stored": True
         }, field)
Exemplo n.º 11
0
 def testNumericField(self):
     registry = FieldRegistry()
     registry.register('fieldname', NUMERICFIELD)
     field = registry.createField('fieldname', 2010)
     self.assertEquals({
             "type": "NumericField",
             "name": "fieldname",
             "value": 2010,
         }, field)
Exemplo n.º 12
0
 def testIsUntokenized(self):
     registry = FieldRegistry(drilldownFields=[DrilldownField('aDrilldownField')])
     self.assertTrue(registry.isUntokenized('aDrilldownField'))
     self.assertTrue(registry.isUntokenized('untokenized.some.field'))
     self.assertFalse(registry.isUntokenized('other.field'))
     registry.register('fieldname', STRINGFIELD)
     self.assertTrue(registry.isUntokenized('fieldname'))
     registry.register('fieldname', TEXTFIELD)
     self.assertFalse(registry.isUntokenized('fieldname'))
Exemplo n.º 13
0
 def testIsNumeric(self):
     registry = FieldRegistry()
     registry.register("longfield", fieldDefinition=LONGFIELD)
     registry.register("intfield", fieldDefinition=INTFIELD)
     self.assertFalse(registry.isNumeric('field1'))
     self.assertTrue(registry.isNumeric('longfield'))
     self.assertTrue(registry.isNumeric('intfield'))
     self.assertTrue(registry.isNumeric('range.double.afield'))
     self.assertFalse(registry.isNumeric('__key__.field1'))
Exemplo n.º 14
0
    def testDefaultDefinition(self):
        registry = FieldRegistry()
        field = registry.createField('aField', 'id:1')
        self.assertEquals({
                "type": "TextField",
                "name": "aField",
                "value": "id:1",
            }, field)
        self.assertFalse(registry.isUntokenized('aField'))

        registry = FieldRegistry(defaultDefinition=STRINGFIELD)
        field = registry.createField('aField', 'id:1')
        self.assertEquals({
                "type": "StringField",
                "name": "aField",
                "value": "id:1",
            }, field)
        self.assertTrue(registry.isUntokenized('aField'))
Exemplo n.º 15
0
def luceneAndReaderConfig(defaultLuceneSettings, httpRequestAdapter,
                          lucenePort):

    fieldRegistry = FieldRegistry(drilldownFields=drilldownFields)
    luceneIndex = be((Lucene(
        host='127.0.0.1',
        port=lucenePort,
        name=DEFAULT_CORE,
        settings=defaultLuceneSettings.clone(fieldRegistry=fieldRegistry)),
                      (httpRequestAdapter, )))
    return luceneIndex
    def testAddDocument(self):
        fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry())
        observer = CallTrace()
        fields2LuceneDoc.addObserver(observer)
        fields2LuceneDoc.ctx.tx = Transaction('tsname')
        fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
        fields2LuceneDoc.addField('field', 'value')
        consume(fields2LuceneDoc.commit('unused'))

        self.assertEquals(['addDocument'], observer.calledMethodNames())
        self.assertEquals('identifier', observer.calledMethods[0].kwargs['identifier'])
Exemplo n.º 17
0
 def setUp(self, fieldRegistry=FieldRegistry()):
     super(LuceneTestCase, self).setUp()
     self._javaObjects = self._getJavaObjects()
     self._reactor = CallTrace('reactor', methods={'addTimer': lambda seconds, callback: CallTrace('timer')})
     self._defaultSettings = LuceneSettings(commitCount=1, commitTimeout=1, fieldRegistry=fieldRegistry)
     self.lucene = Lucene(
         join(self.tempdir, 'lucene'),
         reactor=self._reactor,
         settings=self._defaultSettings,
     )
     self.observer = CallTrace()
     self.lucene.addObserver(self.observer)
Exemplo n.º 18
0
 def testReuseCreatedField(self):
     registry = FieldRegistry()
     field = registry.createField('fieldname', 'value')
     self.assertEquals("value", field.stringValue())
     newField = registry.createField('fieldname', 'newvalue', mayReUse=True)
     self.assertEquals("newvalue", newField.stringValue())
     self.assertEquals(field, newField)
     newField2 = registry.createField('fieldname',
                                      'newvalue',
                                      mayReUse=False)
     self.assertEquals("newvalue", newField2.stringValue())
     self.assertNotEqual(newField, newField2)
 def _prepareLuceneSettings(self):
     settings = LuceneSettings()
     if hasattr(self, '_analyzer'):
         settings.analyzer = self._analyzer
     if hasattr(self, 'fieldRegistry'):
         settings.fieldRegistry = self.fieldRegistry
     else:
         settings.fieldRegistry = FieldRegistry()
         settings.fieldRegistry.register("intField",
                                         fieldDefinition=INTFIELD)
         settings.fieldRegistry.register("longField",
                                         fieldDefinition=LONGFIELD)
     return settings
 def testDrilldownFieldQuery(self):
     self.fieldRegistry = FieldRegistry(
         [DrilldownField('field', hierarchical=True)])
     self.assertEquals(
         dict(type="TermQuery",
              term=dict(field="field", path=["value"], type="DrillDown")),
         self._convert("field = value"))
     self.assertEquals(
         dict(type="TermQuery",
              term=dict(field="field",
                        path=["value", "value1"],
                        type="DrillDown")),
         self._convert("field = \"value>value1\""))
Exemplo n.º 21
0
    def testCreateDocument(self):
        fields = {
            'field1': ['value1'],
            'field2': ['value2', 'value2.1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            '__key__.field5': ["12345"],
            '__numeric__.field6': ["12345"],
        }
        fields2LuceneDoc = Fields2LuceneDoc('tsname',
                                            fieldRegistry=FieldRegistry())
        observer = CallTrace(returnValues={'numerateTerm': 1})
        fields2LuceneDoc.addObserver(observer)
        document = fields2LuceneDoc._createDocument(fields)
        self.assertEquals(
            set([
                'field1', 'field2', 'sorted.field3', 'untokenized.field4',
                '__key__.field5', '__numeric__.field6'
            ]), set([f.name() for f in document.getFields()]))

        field1 = document.getField("field1")
        self.assertEquals('value1', field1.stringValue())
        self.assertTrue(field1.fieldType().indexed())
        self.assertFalse(field1.fieldType().stored())
        self.assertTrue(field1.fieldType().tokenized())

        self.assertEquals(['value2', 'value2.1'], document.getValues('field2'))

        field3 = document.getField("sorted.field3")
        self.assertEquals('value3', field3.stringValue())
        self.assertTrue(field3.fieldType().indexed())
        self.assertFalse(field3.fieldType().stored())
        self.assertFalse(field3.fieldType().tokenized())

        field4 = document.getField("untokenized.field4")
        self.assertEquals('value4', field4.stringValue())
        self.assertTrue(field4.fieldType().indexed())
        self.assertFalse(field4.fieldType().stored())
        self.assertFalse(field4.fieldType().tokenized())

        field5 = document.getField("__key__.field5")
        self.assertEquals(1, field5.numericValue().longValue())
        self.assertFalse(field5.fieldType().indexed())
        self.assertFalse(field5.fieldType().stored())
        self.assertTrue(field5.fieldType().tokenized())

        field6 = document.getField("__numeric__.field6")
        self.assertEquals(12345, field6.numericValue().longValue())
        self.assertFalse(field6.fieldType().indexed())
        self.assertFalse(field6.fieldType().stored())
        self.assertTrue(field6.fieldType().tokenized())
 def testOnlyOneSortValueAllowed(self):
     fields2LuceneDoc = Fields2LuceneDoc('tsname',
         fieldRegistry=FieldRegistry()
     )
     observer = CallTrace()
     fields2LuceneDoc.addObserver(observer)
     fields2LuceneDoc.ctx.tx = Transaction('tsname')
     fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
     fields2LuceneDoc.addField('sorted.field', 'value1')
     fields2LuceneDoc.addField('sorted.field', 'value2')
     consume(fields2LuceneDoc.commit('unused'))
     fields = observer.calledMethods[0].kwargs['fields']
     self.assertEquals(1, len(fields))
     self.assertEqual({'sort': True, 'type': 'StringField', 'name': 'sorted.field', 'value': 'value1'}, fields[0])
    def testCreateDocument(self):
        fields = {
            'field1': ['value1'],
            'field2': ['value2', 'value2.1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            '__key__.field5': [12345],
            '__numeric__.field6': [12345],
        }
        fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry())
        fields = fields2LuceneDoc._createFields(fields)

        self.assertEqual([
                {
                    "name": "field2",
                    "type": "TextField",
                    "value": "value2"
                },
                {
                    "name": "field2",
                    "type": "TextField",
                    "value": "value2.1"
                },
                {
                    "name": "__key__.field5",
                    "type": "KeyField",
                    "value": 12345
                },
                {
                    "name": "field1",
                    "type": "TextField",
                    "value": "value1"
                },
                {
                    "name": "sorted.field3",
                    "type": "StringField",
                    "value": "value3",
                    "sort": True,
                },
                {
                    "name": "__numeric__.field6",
                    "type": "NumericField",
                    "value": 12345
                },
                {
                    "name": "untokenized.field4",
                    "type": "StringField",
                    "value": "value4"
                }
            ], fields)
Exemplo n.º 24
0
    def testCreateFacet(self):
        fields = {
            'field1': ['value1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            'untokenized.field5': ['value5', 'value6'],
            'untokenized.field6': ['value5/value6'],
            'untokenized.field7': ['valuex'],
            'untokenized.field8': [['grandparent', 'parent', 'child'],
                                   ['parent2', 'child']]
        }
        fields2LuceneDoc = Fields2LuceneDoc(
            'tsname',
            fieldRegistry=FieldRegistry(drilldownFields=[
                DrilldownField('untokenized.field4'),
                DrilldownField('untokenized.field5'),
                DrilldownField('untokenized.field6'),
                DrilldownField('untokenized.field8', hierarchical=True),
            ]))
        observer = CallTrace()
        fields2LuceneDoc.addObserver(observer)
        fields2LuceneDoc.ctx.tx = Transaction('tsname')
        fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
        for field, values in fields.items():
            for value in values:
                fields2LuceneDoc.addField(field, value)

        consume(fields2LuceneDoc.commit('unused'))

        document = observer.calledMethods[0].kwargs['document']
        searchFields = [
            f for f in document.getFields() if not FacetField.instance_(f)
        ]
        self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'],
                          [f.name() for f in searchFields])

        facetsFields = [
            FacetField.cast_(f) for f in document.getFields()
            if FacetField.instance_(f)
        ]
        self.assertEquals(6, len(facetsFields))
        self.assertEquals([
            ('untokenized.field8', ['grandparent', 'parent', 'child']),
            ('untokenized.field8', ['parent2', 'child']),
            ('untokenized.field6', ['value5/value6']),
            ('untokenized.field4', ['value4']),
            ('untokenized.field5', ['value5']),
            ('untokenized.field5', ['value6']),
        ], [(f.dim, list(f.path))
            for f in facetsFields])  # Note: a FacetField doesn't have a name
 def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(
         self):
     self.fieldRegistry = FieldRegistry()
     self.fieldRegistry.register('noTermFreqField',
                                 NO_TERMS_FREQUENCY_FIELD)
     self.unqualifiedFields = [("unqualified", 1.0),
                               ('noTermFreqField', 2.0)]
     expected = dict(type="PhraseQuery",
                     terms=[
                         dict(field="unqualified", value="phrase"),
                         dict(field="unqualified", value="query")
                     ],
                     boost=1.0)
     self.assertEquals(expected, self._convert('"phrase query"'))
Exemplo n.º 26
0
 def setUp(self):
     SeecrTestCase.setUp(self)
     self.registry = FieldRegistry()
     self._multiLucene = MultiLucene(defaultCore='coreA', host="localhost", port=12345)
     self._lucene = Lucene(host="localhost", port=12345, settings=LuceneSettings(), name='coreA')
     self._multiLucene.addObserver(self._lucene)
     self.post = []
     self.response = ""
     def mockPost(data, path, **kwargs):
         self.post.append(dict(data=data, path=path))
         raise StopIteration(self.response)
         yield
     connect = self._multiLucene._connect()
     connect._post = mockPost
     self._multiLucene._connect = lambda: connect
 def testAddFacetField(self):
     fields2LuceneDoc = Fields2LuceneDoc('tsname',
         fieldRegistry=FieldRegistry(drilldownFields=[
             DrilldownField('untokenized.field'),
         ])
     )
     observer = CallTrace()
     fields2LuceneDoc.addObserver(observer)
     fields2LuceneDoc.ctx.tx = Transaction('tsname')
     fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
     fields2LuceneDoc.addField('field', 'value')
     fields2LuceneDoc.addFacetField('untokenized.field', 'untokenized value')
     consume(fields2LuceneDoc.commit('unused'))
     fields = observer.calledMethods[0].kwargs['fields']
     facetsFields = [f for f in fields if "path" in f]
     self.assertEquals(1, len(facetsFields))
Exemplo n.º 28
0
 def testSpecificField(self):
     registry = FieldRegistry()
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "TextField",
             "name": "fieldname",
             "value": "value",
         }, field)
     registry.register('fieldname', STRINGFIELD_STORED)
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "StringField",
             "name": "fieldname",
             "value": "value",
             "stored": True
         }, field)
Exemplo n.º 29
0
    def testRangeQueryAndType(self):
        registry = FieldRegistry()
        registry.register("longfield", fieldDefinition=LONGFIELD)
        registry.register("intfield", fieldDefinition=INTFIELD)
        q, t = registry.rangeQueryAndType('longfield')
        self.assertEqual("Long", q)
        self.assertEqual(long, t)
        q, t = registry.rangeQueryAndType('intfield')
        self.assertEqual("Int", q)
        self.assertEqual(int, t)
        q, t = registry.rangeQueryAndType('range.double.field')
        self.assertEqual("Double", q)
        self.assertEqual(float, t)

        q, t = registry.rangeQueryAndType('anyfield')
        self.assertEqual("String", q)
        self.assertEqual(str, t)
Exemplo n.º 30
0
 def __init__(
     self,
     commitTimeout=10,
     commitCount=100000,
     lruTaxonomyWriterCacheSize=4000,
     analyzer=dict(type="MerescoStandardAnalyzer"),
     similarity=dict(type="BM25Similarity"),
     mergePolicy=dict(type="TieredMergePolicy",
                      maxMergeAtOnce=2,
                      segmentsPerTier=8.0),
     fieldRegistry=FieldRegistry(),
     numberOfConcurrentTasks=6,
     cacheFacetOrdinals=True,
     verbose=True,
 ):
     local = locals()
     for name in SETTING_NAMES:
         self.__dict__['_' + name] = local[name]
     self.fieldRegistry = fieldRegistry