def testAddDocument(self):
        fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry())
        observer = CallTrace()
        fields2LuceneDoc.addObserver(observer)
        fields2LuceneDoc.ctx.tx = Transaction('tsname')
        fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
        fields2LuceneDoc.addField('field', 'value')
        consume(fields2LuceneDoc.commit('unused'))

        self.assertEquals(['addDocument'], observer.calledMethodNames())
        self.assertEquals('identifier', observer.calledMethods[0].kwargs['identifier'])
    def testCreateDocument(self):
        fields = {
            'field1': ['value1'],
            'field2': ['value2', 'value2.1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            '__key__.field5': ["12345"],
            '__numeric__.field6': ["12345"],
        }
        fields2LuceneDoc = Fields2LuceneDoc('tsname',
                                            fieldRegistry=FieldRegistry())
        observer = CallTrace(returnValues={'numerateTerm': 1})
        fields2LuceneDoc.addObserver(observer)
        document = fields2LuceneDoc._createDocument(fields)
        self.assertEquals(
            set([
                'field1', 'field2', 'sorted.field3', 'untokenized.field4',
                '__key__.field5', '__numeric__.field6'
            ]), set([f.name() for f in document.getFields()]))

        field1 = document.getField("field1")
        self.assertEquals('value1', field1.stringValue())
        self.assertTrue(field1.fieldType().indexed())
        self.assertFalse(field1.fieldType().stored())
        self.assertTrue(field1.fieldType().tokenized())

        self.assertEquals(['value2', 'value2.1'], document.getValues('field2'))

        field3 = document.getField("sorted.field3")
        self.assertEquals('value3', field3.stringValue())
        self.assertTrue(field3.fieldType().indexed())
        self.assertFalse(field3.fieldType().stored())
        self.assertFalse(field3.fieldType().tokenized())

        field4 = document.getField("untokenized.field4")
        self.assertEquals('value4', field4.stringValue())
        self.assertTrue(field4.fieldType().indexed())
        self.assertFalse(field4.fieldType().stored())
        self.assertFalse(field4.fieldType().tokenized())

        field5 = document.getField("__key__.field5")
        self.assertEquals(1, field5.numericValue().longValue())
        self.assertFalse(field5.fieldType().indexed())
        self.assertFalse(field5.fieldType().stored())
        self.assertTrue(field5.fieldType().tokenized())

        field6 = document.getField("__numeric__.field6")
        self.assertEquals(12345, field6.numericValue().longValue())
        self.assertFalse(field6.fieldType().indexed())
        self.assertFalse(field6.fieldType().stored())
        self.assertTrue(field6.fieldType().tokenized())
    def testCreateFacet(self):
        fields = {
            'field1': ['value1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            'untokenized.field5': ['value5', 'value6'],
            'untokenized.field6': ['value5/value6'],
            'untokenized.field7': ['valuex'],
            'untokenized.field8': [['grandparent', 'parent', 'child'],
                                   ['parent2', 'child']]
        }
        fields2LuceneDoc = Fields2LuceneDoc(
            'tsname',
            fieldRegistry=FieldRegistry(drilldownFields=[
                DrilldownField('untokenized.field4'),
                DrilldownField('untokenized.field5'),
                DrilldownField('untokenized.field6'),
                DrilldownField('untokenized.field8', hierarchical=True),
            ]))
        observer = CallTrace()
        fields2LuceneDoc.addObserver(observer)
        fields2LuceneDoc.ctx.tx = Transaction('tsname')
        fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
        for field, values in fields.items():
            for value in values:
                fields2LuceneDoc.addField(field, value)

        consume(fields2LuceneDoc.commit('unused'))

        document = observer.calledMethods[0].kwargs['document']
        searchFields = [
            f for f in document.getFields() if not FacetField.instance_(f)
        ]
        self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'],
                          [f.name() for f in searchFields])

        facetsFields = [
            FacetField.cast_(f) for f in document.getFields()
            if FacetField.instance_(f)
        ]
        self.assertEquals(6, len(facetsFields))
        self.assertEquals([
            ('untokenized.field8', ['grandparent', 'parent', 'child']),
            ('untokenized.field8', ['parent2', 'child']),
            ('untokenized.field6', ['value5/value6']),
            ('untokenized.field4', ['value4']),
            ('untokenized.field5', ['value5']),
            ('untokenized.field5', ['value6']),
        ], [(f.dim, list(f.path))
            for f in facetsFields])  # Note: a FacetField doesn't have a name
    def testCreateDocument(self):
        fields = {
            'field1': ['value1'],
            'field2': ['value2', 'value2.1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            '__key__.field5': [12345],
            '__numeric__.field6': [12345],
        }
        fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry())
        fields = fields2LuceneDoc._createFields(fields)

        self.assertEqual([
                {
                    "name": "field2",
                    "type": "TextField",
                    "value": "value2"
                },
                {
                    "name": "field2",
                    "type": "TextField",
                    "value": "value2.1"
                },
                {
                    "name": "__key__.field5",
                    "type": "KeyField",
                    "value": 12345
                },
                {
                    "name": "field1",
                    "type": "TextField",
                    "value": "value1"
                },
                {
                    "name": "sorted.field3",
                    "type": "StringField",
                    "value": "value3",
                    "sort": True,
                },
                {
                    "name": "__numeric__.field6",
                    "type": "NumericField",
                    "value": 12345
                },
                {
                    "name": "untokenized.field4",
                    "type": "StringField",
                    "value": "value4"
                }
            ], fields)
 def testOnlyOneSortValueAllowed(self):
     fields2LuceneDoc = Fields2LuceneDoc('tsname',
         fieldRegistry=FieldRegistry()
     )
     observer = CallTrace()
     fields2LuceneDoc.addObserver(observer)
     fields2LuceneDoc.ctx.tx = Transaction('tsname')
     fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
     fields2LuceneDoc.addField('sorted.field', 'value1')
     fields2LuceneDoc.addField('sorted.field', 'value2')
     consume(fields2LuceneDoc.commit('unused'))
     fields = observer.calledMethods[0].kwargs['fields']
     self.assertEquals(1, len(fields))
     self.assertEqual({'sort': True, 'type': 'StringField', 'name': 'sorted.field', 'value': 'value1'}, fields[0])
 def testRewriteFields(self):
     def rewriteFields(fields):
         fields['keys'] = list(sorted(fields.keys()))
         return fields
     fields2LuceneDoc = Fields2LuceneDoc('tsname', rewriteFields=rewriteFields, fieldRegistry=FieldRegistry())
     observer = CallTrace()
     fields2LuceneDoc.addObserver(observer)
     fields2LuceneDoc.ctx.tx = Transaction('tsname')
     fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
     fields2LuceneDoc.addField('field1', 'value1')
     fields2LuceneDoc.addField('field2', 'value2')
     consume(fields2LuceneDoc.commit('unused'))
     self.assertEquals(['addDocument'], observer.calledMethodNames())
     fields = observer.calledMethods[0].kwargs['fields']
     self.assertEquals(set(['field1', 'field2', 'keys']), set([f['name'] for f in fields]))
     self.assertEquals(['field1', 'field2'], [f['value'] for f in fields if f['name'] == 'keys'])
 def testAddFacetField(self):
     fields2LuceneDoc = Fields2LuceneDoc('tsname',
         fieldRegistry=FieldRegistry(drilldownFields=[
             DrilldownField('untokenized.field'),
         ])
     )
     observer = CallTrace()
     fields2LuceneDoc.addObserver(observer)
     fields2LuceneDoc.ctx.tx = Transaction('tsname')
     fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
     fields2LuceneDoc.addField('field', 'value')
     fields2LuceneDoc.addFacetField('untokenized.field', 'untokenized value')
     consume(fields2LuceneDoc.commit('unused'))
     fields = observer.calledMethods[0].kwargs['fields']
     facetsFields = [f for f in fields if "path" in f]
     self.assertEquals(1, len(facetsFields))
    def testCreateFacet(self):
        fields = {
            'field1': ['value1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            'untokenized.field5': ['value5', 'value6'],
            'untokenized.field6': ['value5/value6'],
            'untokenized.field7': ['valuex'],
            'untokenized.field8': [['grandparent', 'parent', 'child'], ['parent2', 'child']]
        }
        fields2LuceneDoc = Fields2LuceneDoc('tsname',
            fieldRegistry=FieldRegistry(drilldownFields=[
                DrilldownField('untokenized.field4'),
                DrilldownField('untokenized.field5'),
                DrilldownField('untokenized.field6'),
                DrilldownField('untokenized.field8', hierarchical=True),
            ])
        )
        observer = CallTrace()
        fields2LuceneDoc.addObserver(observer)
        fields2LuceneDoc.ctx.tx = Transaction('tsname')
        fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
        for field, values in fields.items():
            for value in values:
                fields2LuceneDoc.addField(field, value)

        consume(fields2LuceneDoc.commit('unused'))

        fields = observer.calledMethods[0].kwargs['fields']

        searchFields = [f for f in fields if not "path" in f]
        self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'], [f['name'] for f in searchFields])

        facetsFields = [f for f in fields if "path" in f]
        self.assertEquals(6, len(facetsFields))
        self.assertEquals([
                ('untokenized.field8', ['grandparent', 'parent', 'child']),
                ('untokenized.field8', ['parent2', 'child']),
                ('untokenized.field6', ['value5/value6']),
                ('untokenized.field4', ['value4']),
                ('untokenized.field5', ['value5']),
                ('untokenized.field5', ['value6']),
            ], [(f['name'], f['path']) for f in facetsFields])
 def testAddFacetField(self):
     fields2LuceneDoc = Fields2LuceneDoc(
         'tsname',
         fieldRegistry=FieldRegistry(drilldownFields=[
             DrilldownField('untokenized.field'),
         ]))
     observer = CallTrace()
     fields2LuceneDoc.addObserver(observer)
     fields2LuceneDoc.ctx.tx = Transaction('tsname')
     fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
     fields2LuceneDoc.addField('field', 'value')
     fields2LuceneDoc.addFacetField('untokenized.field',
                                    'untokenized value')
     consume(fields2LuceneDoc.commit('unused'))
     document = observer.calledMethods[0].kwargs['document']
     facetsFields = [
         FacetField.cast_(f) for f in document.getFields()
         if FacetField.instance_(f)
     ]
     self.assertEquals(1, len(facetsFields))
Exemplo n.º 10
0
def uploadHelix(lucene, termNumerator, storageComponent, drilldownFields,
                fieldRegistry):
    indexHelix = (Fields2LuceneDoc('record', fieldRegistry=fieldRegistry),
                  (termNumerator, ), (lucene, ))

    return \
    (SruRecordUpdate(),
        (TransactionScope('record'),
            (Venturi(should=[{'partname': 'record', 'xpath': '.'}], namespaces={'doc': 'http://meresco.org/namespace/example'}),
                (FilterMessages(allowed=['delete']),
                    (lucene,),
                    (storageComponent,)
                ),
                (FilterMessages(allowed=['add']),
                    (Xml2Fields(),
                        (RenameField(lambda name: name.split('.', 1)[-1]),
                            (FilterField(lambda name: 'fieldHier' not in name),
                                indexHelix,
                            ),
                            (FilterField(lambda name: name == 'intfield1'),
                                (RenameField(lambda name: SORTED_PREFIX + name),
                                    indexHelix,
                                )
                            ),
                            (FilterField(lambda name: name in ['field2', 'field3']),
                                (RenameField(lambda name: UNTOKENIZED_PREFIX + name),
                                    indexHelix,
                                )
                            ),
                        )
                    ),
                    (FieldHier(),
                        indexHelix,
                    )
                ),
                (XmlPrintLxml(fromKwarg='lxmlNode', toKwarg='data'),
                    (storageComponent,)
                )
            )
        )
    )