def _fixture_setup(self): if hasattr(self, 'exist_fixtures'): db = ExistDB() # load index if 'index' in self.exist_fixtures: db.loadCollectionIndex(settings.EXISTDB_ROOT_COLLECTION, open(self.exist_fixtures['index'])) if 'directory' in self.exist_fixtures: for filename in glob(path.join(self.exist_fixtures['directory'], '*.xml')): self._load_file_to_exist(filename) if 'files' in self.exist_fixtures: for filename in self.exist_fixtures['files']: self._load_file_to_exist(filename) return super(TestCase, self)._fixture_setup()
def _fixture_setup(self): if hasattr(self, 'exist_fixtures'): db = ExistDB() # load index if 'index' in self.exist_fixtures: db.loadCollectionIndex(settings.EXISTDB_ROOT_COLLECTION, open(self.exist_fixtures['index'])) if 'directory' in self.exist_fixtures: for file in glob( path.join(self.exist_fixtures['directory'], '*.xml')): self._load_file_to_exist(file) if 'files' in self.exist_fixtures: for file in self.exist_fixtures['files']: self._load_file_to_exist(file) return super(TestCase, self)._fixture_setup()
class Command(BaseCommand): help = """Tasks for managing eXist-db index configuration file. Available subcommands: load-index - load index configuration file to eXist show-index - show the contents of index configuration file currently in eXist index-info - show information about index configuration file in eXist (owner, date modified, etc.) remove-index - remove index configuration from eXist reindex - reindex the configured eXist collection with the loaded index """ arg_list = ['load-index', 'show-index', 'index-info', 'remove-index', 'reindex'] args = ' | '. join(arg_list) def get_password_option(option, opt, value, parser): setattr(parser.values, option.dest, getpass()) option_list = BaseCommand.option_list + ( make_option('--username', '-u', dest='username', action='store', help='''Username to use when connecting to eXist (overrides any in local settings)'''), make_option('--password', '-p', dest='password', action='callback', callback=get_password_option, help='''Prompt for password (required when --username is specified)'''), ) # FIXME/TODO: possibly convert into a django LabelCommand def handle(self, *args, **options): if not len(args) or args[0] == 'help': print self.help return cmd = args[0] if cmd not in self.arg_list: print "Command '%s' not recognized" % cmd print self.help return # check for required settings (used in all modes) if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION: raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing") return if not hasattr(settings, 'EXISTDB_INDEX_CONFIGFILE') or not settings.EXISTDB_INDEX_CONFIGFILE: raise CommandError("EXISTDB_INDEX_CONFIGFILE setting is missing") return collection = settings.EXISTDB_ROOT_COLLECTION index = settings.EXISTDB_INDEX_CONFIGFILE credentials = {} if options.get('username') is not None: credentials['EXISTDB_SERVER_USER'] = options.get('username') if options.get('password') is not None: credentials['EXISTDB_SERVER_PASSWORD'] = options.get('password') try: # Explicitly request no timeout (even if one is configured # in django settings), since some tasks (such as # reindexing) could take a while. if credentials: # NOTE: override_settings is a test utility, but this is currently # the simplest way to specify credentials, since by default existdb # with override_settings(**credentials): self.db = ExistDB(timeout=None) else: self.db = ExistDB(timeout=None) # check there is already an index config hasindex = self.db.hasCollectionIndex(collection) # for all commands but load, nothing to do if config collection does not exist if not hasindex and cmd != 'load-index': raise CommandError("Collection %s has no index configuration" % collection) if cmd == 'load-index': # load collection index to eXist # no easy way to check if index is different, but give some info to user to help indicate if hasindex: index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection)) print "Collection already has an index configuration; last modified %s\n" % index_desc['modified'] else: print "This appears to be a new index configuration\n" message = "eXist index configuration \n collection:\t%s\n index file:\t%s" % (collection, index) success = self.db.loadCollectionIndex(collection, open(index)) if success: print "Succesfully updated %s" % message print """ If your collection already contains data and the index configuration is new or has changed, you should reindex the collection. """ else: raise CommandError("Failed to update %s" % message) elif cmd == 'show-index': # show the contents of the the collection index config file in exist print self.db.getDoc(self.db._collectionIndexPath(collection)) elif cmd == 'index-info': # show information about the collection index config file in exist index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection)) for field, val in index_desc.items(): print "%s:\t%s" % (field, val) elif cmd == 'remove-index': # remove any collection index in eXist if self.db.removeCollectionIndex(collection): print "Removed collection index configuration for %s" % collection else: raise CommandError("Failed to remove collection index configuration for %s" % collection) elif cmd == 'reindex': # reindex the collection if not self.db.hasCollection(collection): raise CommandError("Collection %s does not exist" % collection) print "Reindexing collection %s" % collection print "-- If you have a large collection, this may take a while." start_time = time.time() success = self.db.reindexCollection(collection) end_time = time.time() if success: print "Successfully reindexed collection %s" % collection print "Reindexing took %.2f seconds" % (end_time - start_time) else: print "Failed to reindexed collection %s" % collection print "-- Check that the configured exist user is in the exist DBA group or specify different credentials." except Exception as err: # better error messages would be nice... raise CommandError(err)
class ExistQueryTest__FullText(unittest.TestCase): # when full-text indexing is enabled, eXist must index files when they are loaded to the db # this makes tests *significantly* slower # any tests that require full-text queries should be here # sample lucene configuration for testing full-text queries FIXTURE_INDEX = ''' <collection xmlns="http://exist-db.org/collection-config/1.0"> <index> <lucene> <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/> <text qname="description"/> <text qname="root"/> </lucene> </index> </collection> ''' def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL) # create index for collection - should be applied to newly loaded files self.db.loadCollectionIndex(COLLECTION, self.FIXTURE_INDEX) load_fixtures(self.db) self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel) def tearDown(self): self.db.removeCollection(COLLECTION) self.db.removeCollectionIndex(COLLECTION) def test_filter_fulltext_terms(self): fqs = self.qs.filter(description__fulltext_terms='only two') self.assertEqual(1, fqs.count(), "should get 1 match for fulltext_terms search on = 'only two' (got %s)" % fqs.count()) def test_filter_fulltext_options(self): qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel, fulltext_options={'default-operator': 'and'}) # search for terms present in fixtures - but not both present in one doc fqs = qs.filter(description__fulltext_terms='only third') # for now, just confirm that the option is passed through to query self.assert_('<default-operator>and</default-operator>' in fqs.query.getQuery()) # TODO: test this properly! # query options not supported in current version of eXist # self.assertEqual(0, fqs.count()) def test_order_by__fulltext_score(self): fqs = self.qs.filter(description__fulltext_terms='one').order_by('-fulltext_score') self.assertEqual('one', fqs[0].name) # one appears 3 times, should be first def test_only__fulltext_score(self): fqs = self.qs.filter(description__fulltext_terms='one').only('fulltext_score', 'name') self.assert_(isinstance(fqs[0], QueryTestModel)) # actually a Partial type derived from this # fulltext score attribute should be present self.assertNotEqual(fqs[0].fulltext_score, None) self.assert_(float(fqs[0].fulltext_score) > 0.5) # full-text score should be a float def test_fulltext_highlight(self): fqs = self.qs.filter(description__fulltext_terms='only two') # result from fulltext search - by default, xml should have exist:match tags self.assert_('<exist:match' in fqs[0].serialize()) fqs = self.qs.filter(description__fulltext_terms='only two', highlight=False) # with highlighting disabled, should not have exist:match tags self.assert_('<exist:match' not in fqs[0].serialize()) # order of args in the same filter should not matter fqs = self.qs.filter(highlight=False, description__fulltext_terms='only two') # with highlighting disabled, should not have exist:match tags self.assert_('<exist:match' not in fqs[0].serialize()) # separate filters should also work fqs = self.qs.filter(description__fulltext_terms='only two').filter(highlight=False) # with highlighting disabled, should not have exist:match tags self.assert_('<exist:match' not in fqs[0].serialize()) def test_highlight(self): fqs = self.qs.filter(highlight='supercalifragilistic') self.assertEqual(4, fqs.count(), "highlight filter returns all documents even though search term is not present") fqs = self.qs.filter(highlight='one').order_by('id') self.assert_('<exist:match' in fqs[0].serialize()) def test_match_count(self): fqs = self.qs.filter(id='one', highlight='one').only('match_count') self.assertEqual(fqs[0].match_count, 4, "4 matched words should be found") def test_using(self): fqs = self.qs.using('new-collection') # using should update the collection on the xquery object self.assertEqual('new-collection', fqs.query.collection)
class Command(BaseCommand): help = """Tasks for managing eXist-db index configuration file. Available subcommands: load-index - load index configuration file to eXist show-index - show the contents of index configuration file currently in eXist index-info - show information about index configuration file in eXist (owner, date modified, etc.) remove-index - remove index configuration from eXist reindex - reindex the configured eXist collection with the loaded index """ arg_list = ['load-index', 'show-index', 'index-info', 'remove-index', 'reindex'] args = ' | '. join(arg_list) # FIXME/TODO: possibly convert into a django LabelCommand def handle(self, *args, **options): if not len(args) or args[0] == 'help': print self.help return cmd = args[0] if cmd not in self.arg_list: print "Command '%s' not recognized" % cmd print self.help return # check for required settings (used in all modes) if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION: raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing") return if not hasattr(settings, 'EXISTDB_INDEX_CONFIGFILE') or not settings.EXISTDB_INDEX_CONFIGFILE: raise CommandError("EXISTDB_INDEX_CONFIGFILE setting is missing") return collection = settings.EXISTDB_ROOT_COLLECTION index = settings.EXISTDB_INDEX_CONFIGFILE try: # Explicitly request no timeout (even if one is configured # in django settings), since some tasks (such as # reindexing) could take a while. self.db = ExistDB(timeout=None) # check there is already an index config hasindex = self.db.hasCollectionIndex(collection) # for all commands but load, nothing to do if config collection does not exist if not hasindex and cmd != 'load-index': raise CommandError("Collection %s has no index configuration" % collection) if cmd == 'load-index': # load collection index to eXist # no easy way to check if index is different, but give some info to user to help indicate if hasindex: index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection)) print "Collection already has an index configuration; last modified %s\n" % index_desc['modified'] else: print "This appears to be a new index configuration\n" message = "eXist index configuration \n collection:\t%s\n index file:\t%s" % (collection, index) success = self.db.loadCollectionIndex(collection, open(index)) if success: print "Succesfully updated %s" % message print """ If your collection already contains data and the index configuration is new or has changed, you should reindex the collection. """ else: raise CommandError("Failed to update %s" % message) elif cmd == 'show-index': # show the contents of the the collection index config file in exist print self.db.getDoc(self.db._collectionIndexPath(collection)) elif cmd == 'index-info': # show information about the collection index config file in exist index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection)) for field, val in index_desc.items(): print "%s:\t%s" % (field, val) elif cmd == 'remove-index': # remove any collection index in eXist if self.db.removeCollectionIndex(collection): print "Removed collection index configuration for %s" % collection else: raise CommandError("Failed to remove collection index configuration for %s" % collection) elif cmd == 'reindex': # reindex the collection if not self.db.hasCollection(collection): raise CommandError("Collection %s does not exist" % collection) print "Reindexing collection %s" % collection print "-- If you have a large collection, this may take a while." start_time = time.time() success = self.db.reindexCollection(collection) end_time = time.time() if success: print "Successfully reindexed collection %s" % collection print "Reindexing took %.2f seconds" % (end_time - start_time) else: print "Failed to reindexed collection %s" % collection print "-- Check that the configured exist user is in the exist DBA group." except Exception as err: # better error messages would be nice... raise CommandError(err)